diff options
Diffstat (limited to 'src')
99 files changed, 3219 insertions, 2245 deletions
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index aab3e979a..f80ab92e4 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp @@ -38,7 +38,7 @@ Stream::Stream(Core::Timing::CoreTiming& core_timing, u32 sample_rate, Format fo sink_stream{sink_stream}, core_timing{core_timing}, name{std::move(name_)} { release_event = Core::Timing::CreateEvent( - name, [this](u64 userdata, s64 cycles_late) { ReleaseActiveBuffer(cycles_late); }); + name, [this](u64, std::chrono::nanoseconds ns_late) { ReleaseActiveBuffer(ns_late); }); } void Stream::Play() { @@ -59,11 +59,9 @@ Stream::State Stream::GetState() const { return state; } -s64 Stream::GetBufferReleaseNS(const Buffer& buffer) const { +std::chrono::nanoseconds Stream::GetBufferReleaseNS(const Buffer& buffer) const { const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; - const auto ns = - std::chrono::nanoseconds((static_cast<u64>(num_samples) * 1000000000ULL) / sample_rate); - return ns.count(); + return std::chrono::nanoseconds((static_cast<u64>(num_samples) * 1000000000ULL) / sample_rate); } static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) { @@ -80,7 +78,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) { } } -void Stream::PlayNextBuffer(s64 cycles_late) { +void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) { if (!IsPlaying()) { // Ensure we are in playing state before playing the next buffer sink_stream.Flush(); @@ -105,17 +103,18 @@ void Stream::PlayNextBuffer(s64 cycles_late) { sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); - core_timing.ScheduleEvent( - GetBufferReleaseNS(*active_buffer) - - (Settings::values.enable_audio_stretching.GetValue() ? 0 : cycles_late), - release_event, {}); + const auto time_stretch_delta = Settings::values.enable_audio_stretching.GetValue() + ? std::chrono::nanoseconds::zero() + : ns_late; + const auto future_time = GetBufferReleaseNS(*active_buffer) - time_stretch_delta; + core_timing.ScheduleEvent(future_time, release_event, {}); } -void Stream::ReleaseActiveBuffer(s64 cycles_late) { +void Stream::ReleaseActiveBuffer(std::chrono::nanoseconds ns_late) { ASSERT(active_buffer); released_buffers.push(std::move(active_buffer)); release_callback(); - PlayNextBuffer(cycles_late); + PlayNextBuffer(ns_late); } bool Stream::QueueBuffer(BufferPtr&& buffer) { diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h index 524376257..6437b8591 100644 --- a/src/audio_core/stream.h +++ b/src/audio_core/stream.h @@ -4,6 +4,7 @@ #pragma once +#include <chrono> #include <functional> #include <memory> #include <string> @@ -90,16 +91,13 @@ public: private: /// Plays the next queued buffer in the audio stream, starting playback if necessary - void PlayNextBuffer(s64 cycles_late = 0); + void PlayNextBuffer(std::chrono::nanoseconds ns_late = {}); /// Releases the actively playing buffer, signalling that it has been completed - void ReleaseActiveBuffer(s64 cycles_late = 0); + void ReleaseActiveBuffer(std::chrono::nanoseconds ns_late = {}); /// Gets the number of core cycles when the specified buffer will be released - s64 GetBufferReleaseNS(const Buffer& buffer) const; - - /// Gets the number of core cycles when the specified buffer will be released - s64 GetBufferReleaseNSHostTiming(const Buffer& buffer) const; + std::chrono::nanoseconds GetBufferReleaseNS(const Buffer& buffer) const; u32 sample_rate; ///< Sample rate of the stream Format format; ///< Format of the stream diff --git a/src/common/alignment.h b/src/common/alignment.h index b37044bb6..ef4d6f896 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -3,7 +3,7 @@ #pragma once #include <cstddef> -#include <memory> +#include <new> #include <type_traits> namespace Common { @@ -54,66 +54,28 @@ public: using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using pointer = T*; - using const_pointer = const T*; - - using reference = T&; - using const_reference = const T&; - using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; -public: constexpr AlignmentAllocator() noexcept = default; template <typename T2> constexpr AlignmentAllocator(const AlignmentAllocator<T2, Align>&) noexcept {} - pointer address(reference r) noexcept { - return std::addressof(r); - } - - const_pointer address(const_reference r) const noexcept { - return std::addressof(r); - } - - pointer allocate(size_type n) { - return static_cast<pointer>(::operator new (n, std::align_val_t{Align})); - } - - void deallocate(pointer p, size_type) { - ::operator delete (p, std::align_val_t{Align}); + T* allocate(size_type n) { + return static_cast<T*>(::operator new (n * sizeof(T), std::align_val_t{Align})); } - void construct(pointer p, const value_type& wert) { - new (p) value_type(wert); - } - - void destroy(pointer p) { - p->~value_type(); - } - - size_type max_size() const noexcept { - return size_type(-1) / sizeof(value_type); + void deallocate(T* p, size_type n) { + ::operator delete (p, n * sizeof(T), std::align_val_t{Align}); } template <typename T2> struct rebind { using other = AlignmentAllocator<T2, Align>; }; - - bool operator!=(const AlignmentAllocator<T, Align>& other) const noexcept { - return !(*this == other); - } - - // Returns true if and only if storage allocated from *this - // can be deallocated from other, and vice versa. - // Always returns true for stateless allocators. - bool operator==(const AlignmentAllocator<T, Align>& other) const noexcept { - return true; - } }; } // namespace Common diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index a63e60461..b5feb3f24 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -53,12 +53,12 @@ void CoreTiming::ThreadEntry(CoreTiming& instance) { instance.ThreadLoop(); } -void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) { +void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) { on_thread_init = std::move(on_thread_init_); event_fifo_id = 0; shutting_down = false; ticks = 0; - const auto empty_timed_callback = [](u64, s64) {}; + const auto empty_timed_callback = [](u64, std::chrono::nanoseconds) {}; ev_lost = CreateEvent("_lost_event", empty_timed_callback); if (is_multicore) { timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this)); @@ -106,11 +106,11 @@ bool CoreTiming::HasPendingEvents() const { return !(wait_set && event_queue.empty()); } -void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type, - u64 userdata) { +void CoreTiming::ScheduleEvent(std::chrono::nanoseconds ns_into_future, + const std::shared_ptr<EventType>& event_type, u64 userdata) { { std::scoped_lock scope{basic_lock}; - const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future); + const u64 timeout = static_cast<u64>((GetGlobalTimeNs() + ns_into_future).count()); event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); @@ -195,8 +195,9 @@ std::optional<s64> CoreTiming::Advance() { event_queue.pop_back(); basic_lock.unlock(); - if (auto event_type{evt.type.lock()}) { - event_type->callback(evt.userdata, global_timer - evt.time); + if (const auto event_type{evt.type.lock()}) { + event_type->callback( + evt.userdata, std::chrono::nanoseconds{static_cast<s64>(global_timer - evt.time)}); } basic_lock.lock(); diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 72faaab64..120c74e46 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -17,14 +17,12 @@ #include "common/common_types.h" #include "common/spin_lock.h" #include "common/thread.h" -#include "common/threadsafe_queue.h" #include "common/wall_clock.h" -#include "core/hardware_properties.h" namespace Core::Timing { /// A callback that may be scheduled for a particular core timing event. -using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>; +using TimedCallback = std::function<void(u64 userdata, std::chrono::nanoseconds ns_late)>; /// Contains the characteristics of a particular event. struct EventType { @@ -42,12 +40,12 @@ struct EventType { * in main CPU clock cycles. * * To schedule an event, you first have to register its type. This is where you pass in the - * callback. You then schedule events using the type id you get back. + * callback. You then schedule events using the type ID you get back. * - * The int cyclesLate that the callbacks get is how many cycles late it was. + * The s64 ns_late that the callbacks get is how many ns late it was. * So to schedule a new event on a regular basis: * inside callback: - * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever") + * ScheduleEvent(period_in_ns - ns_late, callback, "whatever") */ class CoreTiming { public: @@ -62,7 +60,7 @@ public: /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is /// required to end slice - 1 and start slice 0 before the first cycle of code is executed. - void Initialize(std::function<void(void)>&& on_thread_init_); + void Initialize(std::function<void()>&& on_thread_init_); /// Tears down all timing related functionality. void Shutdown(); @@ -95,8 +93,8 @@ public: bool HasPendingEvents() const; /// Schedules an event in core timing - void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type, - u64 userdata = 0); + void ScheduleEvent(std::chrono::nanoseconds ns_into_future, + const std::shared_ptr<EventType>& event_type, u64 userdata = 0); void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata); @@ -141,8 +139,6 @@ private: u64 global_timer = 0; - std::chrono::nanoseconds start_point; - // The queue is a min-heap using std::make_heap/push_heap/pop_heap. // We don't use std::priority_queue because we need to be able to serialize, unserialize and // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't @@ -161,7 +157,7 @@ private: std::atomic<bool> wait_set{}; std::atomic<bool> shutting_down{}; std::atomic<bool> has_started{}; - std::function<void(void)> on_thread_init{}; + std::function<void()> on_thread_init{}; bool is_multicore{}; diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp index d126ae8dd..2aff2708a 100644 --- a/src/core/file_sys/fsmitm_romfsbuild.cpp +++ b/src/core/file_sys/fsmitm_romfsbuild.cpp @@ -240,7 +240,7 @@ RomFSBuildContext::RomFSBuildContext(VirtualDir base_, VirtualDir ext_) RomFSBuildContext::~RomFSBuildContext() = default; -std::map<u64, VirtualFile> RomFSBuildContext::Build() { +std::multimap<u64, VirtualFile> RomFSBuildContext::Build() { const u64 dir_hash_table_entry_count = romfs_get_hash_table_count(num_dirs); const u64 file_hash_table_entry_count = romfs_get_hash_table_count(num_files); dir_hash_table_size = 4 * dir_hash_table_entry_count; @@ -294,7 +294,7 @@ std::map<u64, VirtualFile> RomFSBuildContext::Build() { cur_dir->parent->child = cur_dir; } - std::map<u64, VirtualFile> out; + std::multimap<u64, VirtualFile> out; // Populate file tables. for (const auto& it : files) { diff --git a/src/core/file_sys/fsmitm_romfsbuild.h b/src/core/file_sys/fsmitm_romfsbuild.h index a62502193..049de180b 100644 --- a/src/core/file_sys/fsmitm_romfsbuild.h +++ b/src/core/file_sys/fsmitm_romfsbuild.h @@ -43,7 +43,7 @@ public: ~RomFSBuildContext(); // This finalizes the context. - std::map<u64, VirtualFile> Build(); + std::multimap<u64, VirtualFile> Build(); private: VirtualDir base; diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp index 16d801c0c..e0ff70174 100644 --- a/src/core/file_sys/vfs_concat.cpp +++ b/src/core/file_sys/vfs_concat.cpp @@ -11,7 +11,7 @@ namespace FileSys { -static bool VerifyConcatenationMapContinuity(const std::map<u64, VirtualFile>& map) { +static bool VerifyConcatenationMapContinuity(const std::multimap<u64, VirtualFile>& map) { const auto last_valid = --map.end(); for (auto iter = map.begin(); iter != last_valid;) { const auto old = iter++; @@ -27,12 +27,12 @@ ConcatenatedVfsFile::ConcatenatedVfsFile(std::vector<VirtualFile> files_, std::s : name(std::move(name)) { std::size_t next_offset = 0; for (const auto& file : files_) { - files[next_offset] = file; + files.emplace(next_offset, file); next_offset += file->GetSize(); } } -ConcatenatedVfsFile::ConcatenatedVfsFile(std::map<u64, VirtualFile> files_, std::string name) +ConcatenatedVfsFile::ConcatenatedVfsFile(std::multimap<u64, VirtualFile> files_, std::string name) : files(std::move(files_)), name(std::move(name)) { ASSERT(VerifyConcatenationMapContinuity(files)); } @@ -50,7 +50,7 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(std::vector<VirtualFile> f } VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, - std::map<u64, VirtualFile> files, + std::multimap<u64, VirtualFile> files, std::string name) { if (files.empty()) return nullptr; diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h index c90f9d5d1..7a26343c0 100644 --- a/src/core/file_sys/vfs_concat.h +++ b/src/core/file_sys/vfs_concat.h @@ -15,7 +15,7 @@ namespace FileSys { // read-only. class ConcatenatedVfsFile : public VfsFile { ConcatenatedVfsFile(std::vector<VirtualFile> files, std::string name); - ConcatenatedVfsFile(std::map<u64, VirtualFile> files, std::string name); + ConcatenatedVfsFile(std::multimap<u64, VirtualFile> files, std::string name); public: ~ConcatenatedVfsFile() override; @@ -25,7 +25,7 @@ public: /// Convenience function that turns a map of offsets to files into a concatenated file, filling /// gaps with a given filler byte. - static VirtualFile MakeConcatenatedFile(u8 filler_byte, std::map<u64, VirtualFile> files, + static VirtualFile MakeConcatenatedFile(u8 filler_byte, std::multimap<u64, VirtualFile> files, std::string name); std::string GetName() const override; @@ -40,7 +40,7 @@ public: private: // Maps starting offset to file -- more efficient. - std::map<u64, VirtualFile> files; + std::multimap<u64, VirtualFile> files; std::string name; }; diff --git a/src/core/hardware_interrupt_manager.cpp b/src/core/hardware_interrupt_manager.cpp index c629d9fa1..efc1030c1 100644 --- a/src/core/hardware_interrupt_manager.cpp +++ b/src/core/hardware_interrupt_manager.cpp @@ -11,19 +11,20 @@ namespace Core::Hardware { InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) { - gpu_interrupt_event = Core::Timing::CreateEvent("GPUInterrupt", [this](u64 message, s64) { - auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv"); - const u32 syncpt = static_cast<u32>(message >> 32); - const u32 value = static_cast<u32>(message); - nvdrv->SignalGPUInterruptSyncpt(syncpt, value); - }); + gpu_interrupt_event = + Core::Timing::CreateEvent("GPUInterrupt", [this](u64 message, std::chrono::nanoseconds) { + auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv"); + const u32 syncpt = static_cast<u32>(message >> 32); + const u32 value = static_cast<u32>(message); + nvdrv->SignalGPUInterruptSyncpt(syncpt, value); + }); } InterruptManager::~InterruptManager() = default; void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) { const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value; - system.CoreTiming().ScheduleEvent(10, gpu_interrupt_event, msg); + system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{10}, gpu_interrupt_event, msg); } } // namespace Core::Hardware diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index e1c7a0f3b..8dd4a2637 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -145,16 +145,18 @@ struct KernelCore::Impl { void InitializePreemption(KernelCore& kernel) { preemption_event = Core::Timing::CreateEvent( - "PreemptionCallback", [this, &kernel](u64 userdata, s64 cycles_late) { + "PreemptionCallback", [this, &kernel](u64, std::chrono::nanoseconds) { { SchedulerLock lock(kernel); global_scheduler.PreemptThreads(); } - s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10)); + const auto time_interval = std::chrono::nanoseconds{ + Core::Timing::msToCycles(std::chrono::milliseconds(10))}; system.CoreTiming().ScheduleEvent(time_interval, preemption_event); }); - s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10)); + const auto time_interval = + std::chrono::nanoseconds{Core::Timing::msToCycles(std::chrono::milliseconds(10))}; system.CoreTiming().ScheduleEvent(time_interval, preemption_event); } diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 7b23a6889..af22f4c33 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -34,7 +34,7 @@ ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kern std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)}; session->request_event = Core::Timing::CreateEvent( - name, [session](u64 userdata, s64 cycles_late) { session->CompleteSyncRequest(); }); + name, [session](u64, std::chrono::nanoseconds) { session->CompleteSyncRequest(); }); session->name = std::move(name); session->parent = std::move(parent); @@ -184,8 +184,8 @@ ResultCode ServerSession::CompleteSyncRequest() { ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory) { - ResultCode result = QueueSyncRequest(std::move(thread), memory); - const u64 delay = kernel.IsMulticore() ? 0U : 20000U; + const ResultCode result = QueueSyncRequest(std::move(thread), memory); + const auto delay = std::chrono::nanoseconds{kernel.IsMulticore() ? 0 : 20000}; Core::System::GetInstance().CoreTiming().ScheduleEvent(delay, request_event, {}); return result; } diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp index 941305e8e..88b01b751 100644 --- a/src/core/hle/kernel/time_manager.cpp +++ b/src/core/hle/kernel/time_manager.cpp @@ -16,7 +16,7 @@ namespace Kernel { TimeManager::TimeManager(Core::System& system_) : system{system_} { time_manager_event_type = Core::Timing::CreateEvent( - "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) { + "Kernel::TimeManagerCallback", [this](u64 thread_handle, std::chrono::nanoseconds) { SchedulerLock lock(system.Kernel()); Handle proper_handle = static_cast<Handle>(thread_handle); if (cancelled_events[proper_handle]) { @@ -34,7 +34,8 @@ void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 ASSERT(timetask); ASSERT(timetask->GetStatus() != ThreadStatus::Ready); ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex); - system.CoreTiming().ScheduleEvent(nanoseconds, time_manager_event_type, event_handle); + system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{nanoseconds}, + time_manager_event_type, event_handle); } else { event_handle = InvalidHandle; } diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index cadc03805..c66124998 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp @@ -55,6 +55,10 @@ std::string VfsDirectoryServiceWrapper::GetName() const { ResultCode VfsDirectoryServiceWrapper::CreateFile(const std::string& path_, u64 size) const { std::string path(FileUtil::SanitizePath(path_)); auto dir = GetDirectoryRelativeWrapped(backing, FileUtil::GetParentPath(path)); + // dir can be nullptr if path contains subdirectories, create those prior to creating the file. + if (dir == nullptr) { + dir = backing->CreateSubdirectory(FileUtil::GetParentPath(path)); + } auto file = dir->CreateFile(FileUtil::GetFilename(path)); if (file == nullptr) { // TODO(DarkLordZach): Find a better error code for this diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index e9020e0dc..680290cbd 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -39,9 +39,10 @@ namespace Service::HID { // Updating period for each HID device. // TODO(ogniK): Find actual polling rate of hid -constexpr s64 pad_update_ticks = static_cast<s64>(1000000000 / 66); -[[maybe_unused]] constexpr s64 accelerometer_update_ticks = static_cast<s64>(1000000000 / 100); -[[maybe_unused]] constexpr s64 gyroscope_update_ticks = static_cast<s64>(1000000000 / 100); +constexpr auto pad_update_ns = std::chrono::nanoseconds{1000000000 / 66}; +[[maybe_unused]] constexpr auto accelerometer_update_ns = + std::chrono::nanoseconds{1000000000 / 100}; +[[maybe_unused]] constexpr auto gyroscope_update_ticks = std::chrono::nanoseconds{1000000000 / 100}; constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000; IAppletResource::IAppletResource(Core::System& system) @@ -75,14 +76,14 @@ IAppletResource::IAppletResource(Core::System& system) GetController<Controller_Stubbed>(HidController::Unknown3).SetCommonHeaderOffset(0x5000); // Register update callbacks - pad_update_event = - Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 ns_late) { + pad_update_event = Core::Timing::CreateEvent( + "HID::UpdatePadCallback", [this](u64 userdata, std::chrono::nanoseconds ns_late) { UpdateControllers(userdata, ns_late); }); // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) - system.CoreTiming().ScheduleEvent(pad_update_ticks, pad_update_event); + system.CoreTiming().ScheduleEvent(pad_update_ns, pad_update_event); ReloadInputDevices(); } @@ -107,7 +108,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) { rb.PushCopyObjects(shared_mem); } -void IAppletResource::UpdateControllers(u64 userdata, s64 ns_late) { +void IAppletResource::UpdateControllers(u64 userdata, std::chrono::nanoseconds ns_late) { auto& core_timing = system.CoreTiming(); const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); @@ -118,7 +119,7 @@ void IAppletResource::UpdateControllers(u64 userdata, s64 ns_late) { controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE); } - core_timing.ScheduleEvent(pad_update_ticks - ns_late, pad_update_event); + core_timing.ScheduleEvent(pad_update_ns - ns_late, pad_update_event); } class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 6fb048360..c6f0a2584 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -4,10 +4,9 @@ #pragma once -#include "core/hle/service/hid/controllers/controller_base.h" -#include "core/hle/service/service.h" +#include <chrono> -#include "controllers/controller_base.h" +#include "core/hle/service/hid/controllers/controller_base.h" #include "core/hle/service/service.h" namespace Core::Timing { @@ -65,7 +64,7 @@ private: } void GetSharedMemoryHandle(Kernel::HLERequestContext& ctx); - void UpdateControllers(u64 userdata, s64 cycles_late); + void UpdateControllers(u64 userdata, std::chrono::nanoseconds ns_late); std::shared_ptr<Kernel::SharedMemory> shared_mem; diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp index 4a1d1182e..4730070cb 100644 --- a/src/core/hle/service/mii/manager.cpp +++ b/src/core/hle/service/mii/manager.cpp @@ -47,66 +47,67 @@ std::array<T, DestArraySize> ResizeArray(const std::array<T, SourceArraySize>& i MiiInfo ConvertStoreDataToInfo(const MiiStoreData& data) { MiiStoreBitFields bf; std::memcpy(&bf, data.data.data.data(), sizeof(MiiStoreBitFields)); - MiiInfo info{}; - info.name = ResizeArray<char16_t, 10, 11>(data.data.name); - info.uuid = data.data.uuid; - info.font_region = static_cast<u8>(bf.font_region.Value()); - info.favorite_color = static_cast<u8>(bf.favorite_color.Value()); - info.gender = static_cast<u8>(bf.gender.Value()); - info.height = static_cast<u8>(bf.height.Value()); - info.build = static_cast<u8>(bf.build.Value()); - info.type = static_cast<u8>(bf.type.Value()); - info.region_move = static_cast<u8>(bf.region_move.Value()); - info.faceline_type = static_cast<u8>(bf.faceline_type.Value()); - info.faceline_color = static_cast<u8>(bf.faceline_color.Value()); - info.faceline_wrinkle = static_cast<u8>(bf.faceline_wrinkle.Value()); - info.faceline_make = static_cast<u8>(bf.faceline_makeup.Value()); - info.hair_type = static_cast<u8>(bf.hair_type.Value()); - info.hair_color = static_cast<u8>(bf.hair_color.Value()); - info.hair_flip = static_cast<u8>(bf.hair_flip.Value()); - info.eye_type = static_cast<u8>(bf.eye_type.Value()); - info.eye_color = static_cast<u8>(bf.eye_color.Value()); - info.eye_scale = static_cast<u8>(bf.eye_scale.Value()); - info.eye_aspect = static_cast<u8>(bf.eye_aspect.Value()); - info.eye_rotate = static_cast<u8>(bf.eye_rotate.Value()); - info.eye_x = static_cast<u8>(bf.eye_x.Value()); - info.eye_y = static_cast<u8>(bf.eye_y.Value()); - info.eyebrow_type = static_cast<u8>(bf.eyebrow_type.Value()); - info.eyebrow_color = static_cast<u8>(bf.eyebrow_color.Value()); - info.eyebrow_scale = static_cast<u8>(bf.eyebrow_scale.Value()); - info.eyebrow_aspect = static_cast<u8>(bf.eyebrow_aspect.Value()); - info.eyebrow_rotate = static_cast<u8>(bf.eyebrow_rotate.Value()); - info.eyebrow_x = static_cast<u8>(bf.eyebrow_x.Value()); - info.eyebrow_y = static_cast<u8>(bf.eyebrow_y.Value() + 3); - info.nose_type = static_cast<u8>(bf.nose_type.Value()); - info.nose_scale = static_cast<u8>(bf.nose_scale.Value()); - info.nose_y = static_cast<u8>(bf.nose_y.Value()); - info.mouth_type = static_cast<u8>(bf.mouth_type.Value()); - info.mouth_color = static_cast<u8>(bf.mouth_color.Value()); - info.mouth_scale = static_cast<u8>(bf.mouth_scale.Value()); - info.mouth_aspect = static_cast<u8>(bf.mouth_aspect.Value()); - info.mouth_y = static_cast<u8>(bf.mouth_y.Value()); - info.beard_color = static_cast<u8>(bf.beard_color.Value()); - info.beard_type = static_cast<u8>(bf.beard_type.Value()); - info.mustache_type = static_cast<u8>(bf.mustache_type.Value()); - info.mustache_scale = static_cast<u8>(bf.mustache_scale.Value()); - info.mustache_y = static_cast<u8>(bf.mustache_y.Value()); - info.glasses_type = static_cast<u8>(bf.glasses_type.Value()); - info.glasses_color = static_cast<u8>(bf.glasses_color.Value()); - info.glasses_scale = static_cast<u8>(bf.glasses_scale.Value()); - info.glasses_y = static_cast<u8>(bf.glasses_y.Value()); - info.mole_type = static_cast<u8>(bf.mole_type.Value()); - info.mole_scale = static_cast<u8>(bf.mole_scale.Value()); - info.mole_x = static_cast<u8>(bf.mole_x.Value()); - info.mole_y = static_cast<u8>(bf.mole_y.Value()); - return info; + + return { + .uuid = data.data.uuid, + .name = ResizeArray<char16_t, 10, 11>(data.data.name), + .font_region = static_cast<u8>(bf.font_region.Value()), + .favorite_color = static_cast<u8>(bf.favorite_color.Value()), + .gender = static_cast<u8>(bf.gender.Value()), + .height = static_cast<u8>(bf.height.Value()), + .build = static_cast<u8>(bf.build.Value()), + .type = static_cast<u8>(bf.type.Value()), + .region_move = static_cast<u8>(bf.region_move.Value()), + .faceline_type = static_cast<u8>(bf.faceline_type.Value()), + .faceline_color = static_cast<u8>(bf.faceline_color.Value()), + .faceline_wrinkle = static_cast<u8>(bf.faceline_wrinkle.Value()), + .faceline_make = static_cast<u8>(bf.faceline_makeup.Value()), + .hair_type = static_cast<u8>(bf.hair_type.Value()), + .hair_color = static_cast<u8>(bf.hair_color.Value()), + .hair_flip = static_cast<u8>(bf.hair_flip.Value()), + .eye_type = static_cast<u8>(bf.eye_type.Value()), + .eye_color = static_cast<u8>(bf.eye_color.Value()), + .eye_scale = static_cast<u8>(bf.eye_scale.Value()), + .eye_aspect = static_cast<u8>(bf.eye_aspect.Value()), + .eye_rotate = static_cast<u8>(bf.eye_rotate.Value()), + .eye_x = static_cast<u8>(bf.eye_x.Value()), + .eye_y = static_cast<u8>(bf.eye_y.Value()), + .eyebrow_type = static_cast<u8>(bf.eyebrow_type.Value()), + .eyebrow_color = static_cast<u8>(bf.eyebrow_color.Value()), + .eyebrow_scale = static_cast<u8>(bf.eyebrow_scale.Value()), + .eyebrow_aspect = static_cast<u8>(bf.eyebrow_aspect.Value()), + .eyebrow_rotate = static_cast<u8>(bf.eyebrow_rotate.Value()), + .eyebrow_x = static_cast<u8>(bf.eyebrow_x.Value()), + .eyebrow_y = static_cast<u8>(bf.eyebrow_y.Value() + 3), + .nose_type = static_cast<u8>(bf.nose_type.Value()), + .nose_scale = static_cast<u8>(bf.nose_scale.Value()), + .nose_y = static_cast<u8>(bf.nose_y.Value()), + .mouth_type = static_cast<u8>(bf.mouth_type.Value()), + .mouth_color = static_cast<u8>(bf.mouth_color.Value()), + .mouth_scale = static_cast<u8>(bf.mouth_scale.Value()), + .mouth_aspect = static_cast<u8>(bf.mouth_aspect.Value()), + .mouth_y = static_cast<u8>(bf.mouth_y.Value()), + .beard_color = static_cast<u8>(bf.beard_color.Value()), + .beard_type = static_cast<u8>(bf.beard_type.Value()), + .mustache_type = static_cast<u8>(bf.mustache_type.Value()), + .mustache_scale = static_cast<u8>(bf.mustache_scale.Value()), + .mustache_y = static_cast<u8>(bf.mustache_y.Value()), + .glasses_type = static_cast<u8>(bf.glasses_type.Value()), + .glasses_color = static_cast<u8>(bf.glasses_color.Value()), + .glasses_scale = static_cast<u8>(bf.glasses_scale.Value()), + .glasses_y = static_cast<u8>(bf.glasses_y.Value()), + .mole_type = static_cast<u8>(bf.mole_type.Value()), + .mole_scale = static_cast<u8>(bf.mole_scale.Value()), + .mole_x = static_cast<u8>(bf.mole_x.Value()), + .mole_y = static_cast<u8>(bf.mole_y.Value()), + }; } u16 GenerateCrc16(const void* data, std::size_t size) { s32 crc{}; - for (int i = 0; i < size; i++) { - crc ^= reinterpret_cast<const u8*>(data)[i] << 8; - for (int j = 0; j < 8; j++) { + for (std::size_t i = 0; i < size; i++) { + crc ^= static_cast<const u8*>(data)[i] << 8; + for (std::size_t j = 0; j < 8; j++) { crc <<= 1; if ((crc & 0x10000) != 0) { crc = (crc ^ 0x1021) & 0xFFFF; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 2f44d3779..789856118 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -28,8 +28,7 @@ namespace Service::NVFlinger { -constexpr s64 frame_ticks = static_cast<s64>(1000000000 / 60); -constexpr s64 frame_ticks_30fps = static_cast<s64>(1000000000 / 30); +constexpr auto frame_ns = std::chrono::nanoseconds{1000000000 / 60}; void NVFlinger::VSyncThread(NVFlinger& nv_flinger) { nv_flinger.SplitVSync(); @@ -67,20 +66,24 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { guard = std::make_shared<std::mutex>(); // Schedule the screen composition events - composition_event = - Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 ns_late) { + composition_event = Core::Timing::CreateEvent( + "ScreenComposition", [this](u64, std::chrono::nanoseconds ns_late) { Lock(); Compose(); - const auto ticks = GetNextTicks(); - this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - ns_late), - composition_event); + + const auto ticks = std::chrono::nanoseconds{GetNextTicks()}; + const auto ticks_delta = ticks - ns_late; + const auto future_ns = std::max(std::chrono::nanoseconds::zero(), ticks_delta); + + this->system.CoreTiming().ScheduleEvent(future_ns, composition_event); }); + if (system.IsMulticore()) { is_running = true; wait_event = std::make_unique<Common::Event>(); vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this)); } else { - system.CoreTiming().ScheduleEvent(frame_ticks, composition_event); + system.CoreTiming().ScheduleEvent(frame_ns, composition_event); } } diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index 53d27859b..ced41b1fe 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -20,7 +20,7 @@ namespace Core::Memory { -constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(1000000000 / 12); +constexpr auto CHEAT_ENGINE_NS = std::chrono::nanoseconds{1000000000 / 12}; constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata) @@ -188,10 +188,12 @@ CheatEngine::~CheatEngine() { } void CheatEngine::Initialize() { - event = Core::Timing::CreateEvent( - "CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id), - [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); }); - core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event); + event = Core::Timing::CreateEvent("CheatEngine::FrameCallback::" + + Common::HexToString(metadata.main_nso_build_id), + [this](u64 userdata, std::chrono::nanoseconds ns_late) { + FrameCallback(userdata, ns_late); + }); + core_timing.ScheduleEvent(CHEAT_ENGINE_NS, event); metadata.process_id = system.CurrentProcess()->GetProcessID(); metadata.title_id = system.CurrentProcess()->GetTitleID(); @@ -217,7 +219,7 @@ void CheatEngine::Reload(std::vector<CheatEntry> cheats) { MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70)); -void CheatEngine::FrameCallback(u64 userdata, s64 ns_late) { +void CheatEngine::FrameCallback(u64, std::chrono::nanoseconds ns_late) { if (is_pending_reload.exchange(false)) { vm.LoadProgram(cheats); } @@ -230,7 +232,7 @@ void CheatEngine::FrameCallback(u64 userdata, s64 ns_late) { vm.Execute(metadata); - core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - ns_late, event); + core_timing.ScheduleEvent(CHEAT_ENGINE_NS - ns_late, event); } } // namespace Core::Memory diff --git a/src/core/memory/cheat_engine.h b/src/core/memory/cheat_engine.h index 2649423f8..d4068cf84 100644 --- a/src/core/memory/cheat_engine.h +++ b/src/core/memory/cheat_engine.h @@ -5,6 +5,7 @@ #pragma once #include <atomic> +#include <chrono> #include <memory> #include <vector> #include "common/common_types.h" @@ -71,7 +72,7 @@ public: void Reload(std::vector<CheatEntry> cheats); private: - void FrameCallback(u64 userdata, s64 cycles_late); + void FrameCallback(u64 userdata, std::chrono::nanoseconds ns_late); DmntCheatVm vm; CheatProcessMetadata metadata; diff --git a/src/core/memory/dmnt_cheat_vm.cpp b/src/core/memory/dmnt_cheat_vm.cpp index fb9f36bfd..2e7da23fe 100644 --- a/src/core/memory/dmnt_cheat_vm.cpp +++ b/src/core/memory/dmnt_cheat_vm.cpp @@ -190,6 +190,15 @@ void DmntCheatVm::LogOpcode(const CheatVmOpcode& opcode) { callbacks->CommandLog( fmt::format("Act[{:02X}]: {:d}", i, save_restore_regmask->should_operate[i])); } + } else if (auto rw_static_reg = std::get_if<ReadWriteStaticRegisterOpcode>(&opcode.opcode)) { + callbacks->CommandLog("Opcode: Read/Write Static Register"); + if (rw_static_reg->static_idx < NumReadableStaticRegisters) { + callbacks->CommandLog("Op Type: ReadStaticRegister"); + } else { + callbacks->CommandLog("Op Type: WriteStaticRegister"); + } + callbacks->CommandLog(fmt::format("Reg Idx {:X}", rw_static_reg->idx)); + callbacks->CommandLog(fmt::format("Stc Idx {:X}", rw_static_reg->static_idx)); } else if (auto debug_log = std::get_if<DebugLogOpcode>(&opcode.opcode)) { callbacks->CommandLog("Opcode: Debug Log"); callbacks->CommandLog(fmt::format("Bit Width: {:X}", debug_log->bit_width)); @@ -544,6 +553,16 @@ bool DmntCheatVm::DecodeNextOpcode(CheatVmOpcode& out) { } opcode.opcode = save_restore_regmask; } break; + case CheatVmOpcodeType::ReadWriteStaticRegister: { + ReadWriteStaticRegisterOpcode rw_static_reg{}; + // C3000XXx + // C3 = opcode 0xC3. + // XX = static register index. + // x = register index. + rw_static_reg.static_idx = ((first_dword >> 4) & 0xFF); + rw_static_reg.idx = (first_dword & 0xF); + opcode.opcode = rw_static_reg; + } break; case CheatVmOpcodeType::DebugLog: { DebugLogOpcode debug_log{}; // FFFTIX## @@ -667,6 +686,7 @@ void DmntCheatVm::ResetState() { registers.fill(0); saved_values.fill(0); loop_tops.fill(0); + static_registers.fill(0); instruction_ptr = 0; condition_depth = 0; decode_success = true; @@ -1153,6 +1173,15 @@ void DmntCheatVm::Execute(const CheatProcessMetadata& metadata) { } } } + } else if (auto rw_static_reg = + std::get_if<ReadWriteStaticRegisterOpcode>(&cur_opcode.opcode)) { + if (rw_static_reg->static_idx < NumReadableStaticRegisters) { + // Load a register with a static register. + registers[rw_static_reg->idx] = static_registers[rw_static_reg->static_idx]; + } else { + // Store a register to a static register. + static_registers[rw_static_reg->static_idx] = registers[rw_static_reg->idx]; + } } else if (auto debug_log = std::get_if<DebugLogOpcode>(&cur_opcode.opcode)) { // Read value from memory. u64 log_value = 0; diff --git a/src/core/memory/dmnt_cheat_vm.h b/src/core/memory/dmnt_cheat_vm.h index 8351fd798..21b86b72c 100644 --- a/src/core/memory/dmnt_cheat_vm.h +++ b/src/core/memory/dmnt_cheat_vm.h @@ -56,6 +56,7 @@ enum class CheatVmOpcodeType : u32 { BeginRegisterConditionalBlock = 0xC0, SaveRestoreRegister = 0xC1, SaveRestoreRegisterMask = 0xC2, + ReadWriteStaticRegister = 0xC3, // This is a meta entry, and not a real opcode. // This is to facilitate multi-nybble instruction decoding. @@ -237,6 +238,11 @@ struct SaveRestoreRegisterMaskOpcode { std::array<bool, 0x10> should_operate{}; }; +struct ReadWriteStaticRegisterOpcode { + u32 static_idx{}; + u32 idx{}; +}; + struct DebugLogOpcode { u32 bit_width{}; u32 log_id{}; @@ -259,7 +265,8 @@ struct CheatVmOpcode { PerformArithmeticStaticOpcode, BeginKeypressConditionalOpcode, PerformArithmeticRegisterOpcode, StoreRegisterToAddressOpcode, BeginRegisterConditionalOpcode, SaveRestoreRegisterOpcode, - SaveRestoreRegisterMaskOpcode, DebugLogOpcode, UnrecognizedInstruction> + SaveRestoreRegisterMaskOpcode, ReadWriteStaticRegisterOpcode, DebugLogOpcode, + UnrecognizedInstruction> opcode{}; }; @@ -281,6 +288,10 @@ public: static constexpr std::size_t MaximumProgramOpcodeCount = 0x400; static constexpr std::size_t NumRegisters = 0x10; + static constexpr std::size_t NumReadableStaticRegisters = 0x80; + static constexpr std::size_t NumWritableStaticRegisters = 0x80; + static constexpr std::size_t NumStaticRegisters = + NumReadableStaticRegisters + NumWritableStaticRegisters; explicit DmntCheatVm(std::unique_ptr<Callbacks> callbacks); ~DmntCheatVm(); @@ -302,6 +313,7 @@ private: std::array<u32, MaximumProgramOpcodeCount> program{}; std::array<u64, NumRegisters> registers{}; std::array<u64, NumRegisters> saved_values{}; + std::array<u64, NumStaticRegisters> static_registers{}; std::array<std::size_t, NumRegisters> loop_tops{}; bool DecodeNextOpcode(CheatVmOpcode& out); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index e8a6f2a6e..44252dd81 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -115,6 +115,7 @@ void LogSettings() { values.use_asynchronous_gpu_emulation.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); + log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); log_setting("Audio_OutputEngine", values.sink_id); log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); @@ -170,6 +171,7 @@ void RestoreGlobalState() { values.use_asynchronous_gpu_emulation.SetGlobal(true); values.use_vsync.SetGlobal(true); values.use_assembly_shaders.SetGlobal(true); + values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.force_30fps_mode.SetGlobal(true); values.bg_red.SetGlobal(true); diff --git a/src/core/settings.h b/src/core/settings.h index a64debd25..386233fdf 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -434,6 +434,7 @@ struct Values { Setting<bool> use_asynchronous_gpu_emulation; Setting<bool> use_vsync; Setting<bool> use_assembly_shaders; + Setting<bool> use_asynchronous_shaders; Setting<bool> force_30fps_mode; Setting<bool> use_fast_gpu_time; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 78915e6db..5a30c75da 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -207,6 +207,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders.GetValue()); + AddField(field_type, "Renderer_UseAsynchronousShaders", + Settings::values.use_asynchronous_shaders.GetValue()); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); } diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp index 8b0c50d11..27b894b51 100644 --- a/src/core/tools/freezer.cpp +++ b/src/core/tools/freezer.cpp @@ -14,7 +14,7 @@ namespace Tools { namespace { -constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(1000000000 / 60); +constexpr auto memory_freezer_ns = std::chrono::nanoseconds{1000000000 / 60}; u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) { switch (width) { @@ -55,10 +55,11 @@ void MemoryWriteWidth(Core::Memory::Memory& memory, u32 width, VAddr addr, u64 v Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_) : core_timing{core_timing_}, memory{memory_} { - event = Core::Timing::CreateEvent( - "MemoryFreezer::FrameCallback", - [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); }); - core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event); + event = Core::Timing::CreateEvent("MemoryFreezer::FrameCallback", + [this](u64 userdata, std::chrono::nanoseconds ns_late) { + FrameCallback(userdata, ns_late); + }); + core_timing.ScheduleEvent(memory_freezer_ns, event); } Freezer::~Freezer() { @@ -68,7 +69,7 @@ Freezer::~Freezer() { void Freezer::SetActive(bool active) { if (!this->active.exchange(active)) { FillEntryReads(); - core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event); + core_timing.ScheduleEvent(memory_freezer_ns, event); LOG_DEBUG(Common_Memory, "Memory freezer activated!"); } else { LOG_DEBUG(Common_Memory, "Memory freezer deactivated!"); @@ -158,7 +159,7 @@ std::vector<Freezer::Entry> Freezer::GetEntries() const { return entries; } -void Freezer::FrameCallback(u64 userdata, s64 ns_late) { +void Freezer::FrameCallback(u64, std::chrono::nanoseconds ns_late) { if (!IsActive()) { LOG_DEBUG(Common_Memory, "Memory freezer has been deactivated, ending callback events."); return; @@ -173,7 +174,7 @@ void Freezer::FrameCallback(u64 userdata, s64 ns_late) { MemoryWriteWidth(memory, entry.width, entry.address, entry.value); } - core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - ns_late, event); + core_timing.ScheduleEvent(memory_freezer_ns - ns_late, event); } void Freezer::FillEntryReads() { diff --git a/src/core/tools/freezer.h b/src/core/tools/freezer.h index 62fc6aa6c..8438783d5 100644 --- a/src/core/tools/freezer.h +++ b/src/core/tools/freezer.h @@ -5,6 +5,7 @@ #pragma once #include <atomic> +#include <chrono> #include <memory> #include <mutex> #include <optional> @@ -72,7 +73,7 @@ public: std::vector<Entry> GetEntries() const; private: - void FrameCallback(u64 userdata, s64 cycles_late); + void FrameCallback(u64 userdata, std::chrono::nanoseconds ns_late); void FillEntryReads(); std::atomic_bool active{false}; diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index e66db1940..244463a47 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -6,6 +6,7 @@ #include <array> #include <bitset> +#include <chrono> #include <cstdlib> #include <memory> #include <string> @@ -17,7 +18,6 @@ namespace { // Numbers are chosen randomly to make sure the correct one is given. constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}}; -constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}}; std::array<s64, 5> delays{}; @@ -25,12 +25,12 @@ std::bitset<CB_IDS.size()> callbacks_ran_flags; u64 expected_callback = 0; template <unsigned int IDX> -void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) { +void HostCallbackTemplate(u64 userdata, std::chrono::nanoseconds ns_late) { static_assert(IDX < CB_IDS.size(), "IDX out of range"); callbacks_ran_flags.set(IDX); REQUIRE(CB_IDS[IDX] == userdata); REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]); - delays[IDX] = nanoseconds_late; + delays[IDX] = ns_late.count(); ++expected_callback; } @@ -77,10 +77,12 @@ TEST_CASE("CoreTiming[BasicOrder]", "[core]") { core_timing.SyncPause(true); - u64 one_micro = 1000U; + const u64 one_micro = 1000U; for (std::size_t i = 0; i < events.size(); i++) { - u64 order = calls_order[i]; - core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); + const u64 order = calls_order[i]; + const auto future_ns = std::chrono::nanoseconds{static_cast<s64>(i * one_micro + 100)}; + + core_timing.ScheduleEvent(future_ns, events[order], CB_IDS[order]); } /// test pause REQUIRE(callbacks_ran_flags.none()); @@ -116,13 +118,16 @@ TEST_CASE("CoreTiming[BasicOrderNoPausing]", "[core]") { expected_callback = 0; - u64 start = core_timing.GetGlobalTimeNs().count(); - u64 one_micro = 1000U; + const u64 start = core_timing.GetGlobalTimeNs().count(); + const u64 one_micro = 1000U; + for (std::size_t i = 0; i < events.size(); i++) { - u64 order = calls_order[i]; - core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); + const u64 order = calls_order[i]; + const auto future_ns = std::chrono::nanoseconds{static_cast<s64>(i * one_micro + 100)}; + core_timing.ScheduleEvent(future_ns, events[order], CB_IDS[order]); } - u64 end = core_timing.GetGlobalTimeNs().count(); + + const u64 end = core_timing.GetGlobalTimeNs().count(); const double scheduling_time = static_cast<double>(end - start); const double timer_time = static_cast<double>(TestTimerSpeed(core_timing)); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 21c46a567..3cd896a0f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -98,6 +98,8 @@ add_library(video_core STATIC sampler_cache.cpp sampler_cache.h shader_cache.h + shader_notify.cpp + shader_notify.h shader/decode/arithmetic.cpp shader/decode/arithmetic_immediate.cpp shader/decode/bfe.cpp @@ -128,6 +130,8 @@ add_library(video_core STATIC shader/decode/other.cpp shader/ast.cpp shader/ast.h + shader/async_shaders.cpp + shader/async_shaders.h shader/compiler_settings.cpp shader/compiler_settings.h shader/control_flow.cpp diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp index 6c426b035..b06c32c84 100644 --- a/src/video_core/compatible_formats.cpp +++ b/src/video_core/compatible_formats.cpp @@ -17,101 +17,94 @@ namespace { // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt constexpr std::array VIEW_CLASS_128_BITS = { - PixelFormat::RGBA32F, - PixelFormat::RGBA32UI, + PixelFormat::R32G32B32A32_FLOAT, + PixelFormat::R32G32B32A32_UINT, + PixelFormat::R32G32B32A32_SINT, }; -// Missing formats: -// PixelFormat::RGBA32I constexpr std::array VIEW_CLASS_96_BITS = { - PixelFormat::RGB32F, + PixelFormat::R32G32B32_FLOAT, }; // Missing formats: // PixelFormat::RGB32UI, // PixelFormat::RGB32I, constexpr std::array VIEW_CLASS_64_BITS = { - PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, - PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S, + PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, + PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, + PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, + PixelFormat::R16G16B16A16_UINT, PixelFormat::R16G16B16A16_SINT, }; -// Missing formats: -// PixelFormat::RGBA16I -// PixelFormat::RG32I // TODO: How should we handle 48 bits? constexpr std::array VIEW_CLASS_32_BITS = { - PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F, - PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI, - PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U, - PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S, - PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8, - PixelFormat::BGRA8_SRGB, + PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, + PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, + PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, + PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM, + PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::B8G8R8A8_UNORM, + PixelFormat::B8G8R8A8_SRGB, PixelFormat::A8B8G8R8_UINT, PixelFormat::A8B8G8R8_SINT, + PixelFormat::A2B10G10R10_UINT, }; -// Missing formats: -// PixelFormat::RGBA8UI -// PixelFormat::RGBA8I -// PixelFormat::RGB10_A2_UI // TODO: How should we handle 24 bits? constexpr std::array VIEW_CLASS_16_BITS = { - PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I, - PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S, + PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, + PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, + PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, }; -// Missing formats: -// PixelFormat::RG8I constexpr std::array VIEW_CLASS_8_BITS = { - PixelFormat::R8UI, - PixelFormat::R8U, + PixelFormat::R8_UINT, + PixelFormat::R8_UNORM, + PixelFormat::R8_SINT, + PixelFormat::R8_SNORM, }; -// Missing formats: -// PixelFormat::R8I -// PixelFormat::R8S constexpr std::array VIEW_CLASS_RGTC1_RED = { - PixelFormat::DXN1, + PixelFormat::BC4_UNORM, + PixelFormat::BC4_SNORM, }; -// Missing formats: -// COMPRESSED_SIGNED_RED_RGTC1 constexpr std::array VIEW_CLASS_RGTC2_RG = { - PixelFormat::DXN2UNORM, - PixelFormat::DXN2SNORM, + PixelFormat::BC5_UNORM, + PixelFormat::BC5_SNORM, }; constexpr std::array VIEW_CLASS_BPTC_UNORM = { - PixelFormat::BC7U, - PixelFormat::BC7U_SRGB, + PixelFormat::BC7_UNORM, + PixelFormat::BC7_SRGB, }; constexpr std::array VIEW_CLASS_BPTC_FLOAT = { - PixelFormat::BC6H_SF16, - PixelFormat::BC6H_UF16, + PixelFormat::BC6H_SFLOAT, + PixelFormat::BC6H_UFLOAT, }; // Compatibility table taken from Table 4.X.1 in: // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt constexpr std::array COPY_CLASS_128_BITS = { - PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23, - PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB, - PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB, - PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16, + PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, + PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, + PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, + PixelFormat::BC7_UNORM, PixelFormat::BC7_SRGB, PixelFormat::BC6H_SFLOAT, + PixelFormat::BC6H_UFLOAT, }; // Missing formats: // PixelFormat::RGBA32I // COMPRESSED_RG_RGTC2 constexpr std::array COPY_CLASS_64_BITS = { - PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, - PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1, - + PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, + PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, + PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, + PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_SINT, + PixelFormat::BC1_RGBA_UNORM, PixelFormat::BC1_RGBA_SRGB, }; // Missing formats: -// PixelFormat::RGBA16I -// PixelFormat::RG32I, // COMPRESSED_RGB_S3TC_DXT1_EXT // COMPRESSED_SRGB_S3TC_DXT1_EXT // COMPRESSED_RGBA_S3TC_DXT1_EXT diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 758bfe148..8e19c3373 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -20,6 +20,7 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" +#include "video_core/shader_notify.h" #include "video_core/video_core.h" namespace Tegra { @@ -36,6 +37,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); + shader_notify = std::make_unique<VideoCore::ShaderNotify>(); } GPU::~GPU() = default; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2c42483bd..19a34c402 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -33,59 +33,68 @@ class System; namespace VideoCore { class RendererBase; +class ShaderNotify; } // namespace VideoCore namespace Tegra { enum class RenderTargetFormat : u32 { NONE = 0x0, - RGBA32_FLOAT = 0xC0, - RGBA32_UINT = 0xC2, - RGBA16_UNORM = 0xC6, - RGBA16_SNORM = 0xC7, - RGBA16_UINT = 0xC9, - RGBA16_FLOAT = 0xCA, - RG32_FLOAT = 0xCB, - RG32_UINT = 0xCD, - RGBX16_FLOAT = 0xCE, - BGRA8_UNORM = 0xCF, - BGRA8_SRGB = 0xD0, - RGB10_A2_UNORM = 0xD1, - RGBA8_UNORM = 0xD5, - RGBA8_SRGB = 0xD6, - RGBA8_SNORM = 0xD7, - RGBA8_UINT = 0xD9, - RG16_UNORM = 0xDA, - RG16_SNORM = 0xDB, - RG16_SINT = 0xDC, - RG16_UINT = 0xDD, - RG16_FLOAT = 0xDE, - R11G11B10_FLOAT = 0xE0, + R32B32G32A32_FLOAT = 0xC0, + R32G32B32A32_SINT = 0xC1, + R32G32B32A32_UINT = 0xC2, + R16G16B16A16_UNORM = 0xC6, + R16G16B16A16_SNORM = 0xC7, + R16G16B16A16_SINT = 0xC8, + R16G16B16A16_UINT = 0xC9, + R16G16B16A16_FLOAT = 0xCA, + R32G32_FLOAT = 0xCB, + R32G32_SINT = 0xCC, + R32G32_UINT = 0xCD, + R16G16B16X16_FLOAT = 0xCE, + B8G8R8A8_UNORM = 0xCF, + B8G8R8A8_SRGB = 0xD0, + A2B10G10R10_UNORM = 0xD1, + A2B10G10R10_UINT = 0xD2, + A8B8G8R8_UNORM = 0xD5, + A8B8G8R8_SRGB = 0xD6, + A8B8G8R8_SNORM = 0xD7, + A8B8G8R8_SINT = 0xD8, + A8B8G8R8_UINT = 0xD9, + R16G16_UNORM = 0xDA, + R16G16_SNORM = 0xDB, + R16G16_SINT = 0xDC, + R16G16_UINT = 0xDD, + R16G16_FLOAT = 0xDE, + B10G11R11_FLOAT = 0xE0, R32_SINT = 0xE3, R32_UINT = 0xE4, R32_FLOAT = 0xE5, - B5G6R5_UNORM = 0xE8, - BGR5A1_UNORM = 0xE9, - RG8_UNORM = 0xEA, - RG8_SNORM = 0xEB, - RG8_UINT = 0xED, + R5G6B5_UNORM = 0xE8, + A1R5G5B5_UNORM = 0xE9, + R8G8_UNORM = 0xEA, + R8G8_SNORM = 0xEB, + R8G8_SINT = 0xEC, + R8G8_UINT = 0xED, R16_UNORM = 0xEE, R16_SNORM = 0xEF, R16_SINT = 0xF0, R16_UINT = 0xF1, R16_FLOAT = 0xF2, R8_UNORM = 0xF3, + R8_SNORM = 0xF4, + R8_SINT = 0xF5, R8_UINT = 0xF6, }; enum class DepthFormat : u32 { - Z32_FLOAT = 0xA, - Z16_UNORM = 0x13, - S8_Z24_UNORM = 0x14, - Z24_X8_UNORM = 0x15, - Z24_S8_UNORM = 0x16, - Z24_C8_UNORM = 0x18, - Z32_S8_X24_FLOAT = 0x19, + D32_FLOAT = 0xA, + D16_UNORM = 0x13, + S8_UINT_Z24_UNORM = 0x14, + D24X8_UNORM = 0x15, + D24S8_UNORM = 0x16, + D24C8_UNORM = 0x18, + D32_FLOAT_S8X24_UINT = 0x19, }; struct CommandListHeader; @@ -96,9 +105,9 @@ class DebugContext; */ struct FramebufferConfig { enum class PixelFormat : u32 { - ABGR8 = 1, - RGB565 = 4, - BGRA8 = 5, + A8B8G8R8_UNORM = 1, + RGB565_UNORM = 4, + B8G8R8A8_UNORM = 5, }; VAddr address; @@ -207,6 +216,14 @@ public: return *renderer; } + VideoCore::ShaderNotify& ShaderNotify() { + return *shader_notify; + } + + const VideoCore::ShaderNotify& ShaderNotify() const { + return *shader_notify; + } + // Waits for the GPU to finish working virtual void WaitIdle() const = 0; @@ -347,6 +364,8 @@ private: std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; /// Inline memory engine std::unique_ptr<Engines::KeplerMemory> kepler_memory; + /// Shader build notifier + std::unique_ptr<VideoCore::ShaderNotify> shader_notify; std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 836b25c1d..9da9fb4ff 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -41,146 +41,168 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth } static constexpr ConversionArray morton_to_linear_fns = { - MortonCopy<true, PixelFormat::ABGR8U>, - MortonCopy<true, PixelFormat::ABGR8S>, - MortonCopy<true, PixelFormat::ABGR8UI>, - MortonCopy<true, PixelFormat::B5G6R5U>, - MortonCopy<true, PixelFormat::A2B10G10R10U>, - MortonCopy<true, PixelFormat::A1B5G5R5U>, - MortonCopy<true, PixelFormat::R8U>, - MortonCopy<true, PixelFormat::R8UI>, - MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::RGBA16U>, - MortonCopy<true, PixelFormat::RGBA16S>, - MortonCopy<true, PixelFormat::RGBA16UI>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, - MortonCopy<true, PixelFormat::RGBA32UI>, - MortonCopy<true, PixelFormat::DXT1>, - MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, - MortonCopy<true, PixelFormat::DXN1>, - MortonCopy<true, PixelFormat::DXN2UNORM>, - MortonCopy<true, PixelFormat::DXN2SNORM>, - MortonCopy<true, PixelFormat::BC7U>, - MortonCopy<true, PixelFormat::BC6H_UF16>, - MortonCopy<true, PixelFormat::BC6H_SF16>, - MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::BGRA8>, - MortonCopy<true, PixelFormat::RGBA32F>, - MortonCopy<true, PixelFormat::RG32F>, - MortonCopy<true, PixelFormat::R32F>, - MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16U>, - MortonCopy<true, PixelFormat::R16S>, - MortonCopy<true, PixelFormat::R16UI>, - MortonCopy<true, PixelFormat::R16I>, - MortonCopy<true, PixelFormat::RG16>, - MortonCopy<true, PixelFormat::RG16F>, - MortonCopy<true, PixelFormat::RG16UI>, - MortonCopy<true, PixelFormat::RG16I>, - MortonCopy<true, PixelFormat::RG16S>, - MortonCopy<true, PixelFormat::RGB32F>, - MortonCopy<true, PixelFormat::RGBA8_SRGB>, - MortonCopy<true, PixelFormat::RG8U>, - MortonCopy<true, PixelFormat::RG8S>, - MortonCopy<true, PixelFormat::RG8UI>, - MortonCopy<true, PixelFormat::RG32UI>, - MortonCopy<true, PixelFormat::RGBX16F>, - MortonCopy<true, PixelFormat::R32UI>, - MortonCopy<true, PixelFormat::R32I>, - MortonCopy<true, PixelFormat::ASTC_2D_8X8>, - MortonCopy<true, PixelFormat::ASTC_2D_8X5>, - MortonCopy<true, PixelFormat::ASTC_2D_5X4>, - MortonCopy<true, PixelFormat::BGRA8_SRGB>, - MortonCopy<true, PixelFormat::DXT1_SRGB>, - MortonCopy<true, PixelFormat::DXT23_SRGB>, - MortonCopy<true, PixelFormat::DXT45_SRGB>, - MortonCopy<true, PixelFormat::BC7U_SRGB>, - MortonCopy<true, PixelFormat::R4G4B4A4U>, + MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>, + MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>, + MortonCopy<true, PixelFormat::A8B8G8R8_SINT>, + MortonCopy<true, PixelFormat::A8B8G8R8_UINT>, + MortonCopy<true, PixelFormat::R5G6B5_UNORM>, + MortonCopy<true, PixelFormat::B5G6R5_UNORM>, + MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>, + MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>, + MortonCopy<true, PixelFormat::A2B10G10R10_UINT>, + MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>, + MortonCopy<true, PixelFormat::R8_UNORM>, + MortonCopy<true, PixelFormat::R8_SNORM>, + MortonCopy<true, PixelFormat::R8_SINT>, + MortonCopy<true, PixelFormat::R8_UINT>, + MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>, + MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>, + MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>, + MortonCopy<true, PixelFormat::R16G16B16A16_SINT>, + MortonCopy<true, PixelFormat::R16G16B16A16_UINT>, + MortonCopy<true, PixelFormat::B10G11R11_FLOAT>, + MortonCopy<true, PixelFormat::R32G32B32A32_UINT>, + MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>, + MortonCopy<true, PixelFormat::BC2_UNORM>, + MortonCopy<true, PixelFormat::BC3_UNORM>, + MortonCopy<true, PixelFormat::BC4_UNORM>, + MortonCopy<true, PixelFormat::BC4_SNORM>, + MortonCopy<true, PixelFormat::BC5_UNORM>, + MortonCopy<true, PixelFormat::BC5_SNORM>, + MortonCopy<true, PixelFormat::BC7_UNORM>, + MortonCopy<true, PixelFormat::BC6H_UFLOAT>, + MortonCopy<true, PixelFormat::BC6H_SFLOAT>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>, + MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>, + MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>, + MortonCopy<true, PixelFormat::R32G32B32A32_SINT>, + MortonCopy<true, PixelFormat::R32G32_FLOAT>, + MortonCopy<true, PixelFormat::R32G32_SINT>, + MortonCopy<true, PixelFormat::R32_FLOAT>, + MortonCopy<true, PixelFormat::R16_FLOAT>, + MortonCopy<true, PixelFormat::R16_UNORM>, + MortonCopy<true, PixelFormat::R16_SNORM>, + MortonCopy<true, PixelFormat::R16_UINT>, + MortonCopy<true, PixelFormat::R16_SINT>, + MortonCopy<true, PixelFormat::R16G16_UNORM>, + MortonCopy<true, PixelFormat::R16G16_FLOAT>, + MortonCopy<true, PixelFormat::R16G16_UINT>, + MortonCopy<true, PixelFormat::R16G16_SINT>, + MortonCopy<true, PixelFormat::R16G16_SNORM>, + MortonCopy<true, PixelFormat::R32G32B32_FLOAT>, + MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>, + MortonCopy<true, PixelFormat::R8G8_UNORM>, + MortonCopy<true, PixelFormat::R8G8_SNORM>, + MortonCopy<true, PixelFormat::R8G8_SINT>, + MortonCopy<true, PixelFormat::R8G8_UINT>, + MortonCopy<true, PixelFormat::R32G32_UINT>, + MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>, + MortonCopy<true, PixelFormat::R32_UINT>, + MortonCopy<true, PixelFormat::R32_SINT>, + MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>, + MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>, + MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>, + MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>, + MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>, + MortonCopy<true, PixelFormat::BC2_SRGB>, + MortonCopy<true, PixelFormat::BC3_SRGB>, + MortonCopy<true, PixelFormat::BC7_SRGB>, + MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_5X5>, + MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_10X8>, + MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_6X6>, + MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_10X10>, + MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_12X12>, + MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_8X6>, + MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, - MortonCopy<true, PixelFormat::ASTC_2D_6X5>, + MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>, MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, - MortonCopy<true, PixelFormat::E5B9G9R9F>, - MortonCopy<true, PixelFormat::Z32F>, - MortonCopy<true, PixelFormat::Z16>, - MortonCopy<true, PixelFormat::Z24S8>, - MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32FS8>, + MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>, + MortonCopy<true, PixelFormat::D32_FLOAT>, + MortonCopy<true, PixelFormat::D16_UNORM>, + MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>, + MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>, + MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>, }; static constexpr ConversionArray linear_to_morton_fns = { - MortonCopy<false, PixelFormat::ABGR8U>, - MortonCopy<false, PixelFormat::ABGR8S>, - MortonCopy<false, PixelFormat::ABGR8UI>, - MortonCopy<false, PixelFormat::B5G6R5U>, - MortonCopy<false, PixelFormat::A2B10G10R10U>, - MortonCopy<false, PixelFormat::A1B5G5R5U>, - MortonCopy<false, PixelFormat::R8U>, - MortonCopy<false, PixelFormat::R8UI>, - MortonCopy<false, PixelFormat::RGBA16F>, - MortonCopy<false, PixelFormat::RGBA16S>, - MortonCopy<false, PixelFormat::RGBA16U>, - MortonCopy<false, PixelFormat::RGBA16UI>, - MortonCopy<false, PixelFormat::R11FG11FB10F>, - MortonCopy<false, PixelFormat::RGBA32UI>, - MortonCopy<false, PixelFormat::DXT1>, - MortonCopy<false, PixelFormat::DXT23>, - MortonCopy<false, PixelFormat::DXT45>, - MortonCopy<false, PixelFormat::DXN1>, - MortonCopy<false, PixelFormat::DXN2UNORM>, - MortonCopy<false, PixelFormat::DXN2SNORM>, - MortonCopy<false, PixelFormat::BC7U>, - MortonCopy<false, PixelFormat::BC6H_UF16>, - MortonCopy<false, PixelFormat::BC6H_SF16>, + MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>, + MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>, + MortonCopy<false, PixelFormat::A8B8G8R8_SINT>, + MortonCopy<false, PixelFormat::A8B8G8R8_UINT>, + MortonCopy<false, PixelFormat::R5G6B5_UNORM>, + MortonCopy<false, PixelFormat::B5G6R5_UNORM>, + MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>, + MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>, + MortonCopy<false, PixelFormat::A2B10G10R10_UINT>, + MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>, + MortonCopy<false, PixelFormat::R8_UNORM>, + MortonCopy<false, PixelFormat::R8_SNORM>, + MortonCopy<false, PixelFormat::R8_SINT>, + MortonCopy<false, PixelFormat::R8_UINT>, + MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>, + MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>, + MortonCopy<false, PixelFormat::R16G16B16A16_SINT>, + MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>, + MortonCopy<false, PixelFormat::R16G16B16A16_UINT>, + MortonCopy<false, PixelFormat::B10G11R11_FLOAT>, + MortonCopy<false, PixelFormat::R32G32B32A32_UINT>, + MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>, + MortonCopy<false, PixelFormat::BC2_UNORM>, + MortonCopy<false, PixelFormat::BC3_UNORM>, + MortonCopy<false, PixelFormat::BC4_UNORM>, + MortonCopy<false, PixelFormat::BC4_SNORM>, + MortonCopy<false, PixelFormat::BC5_UNORM>, + MortonCopy<false, PixelFormat::BC5_SNORM>, + MortonCopy<false, PixelFormat::BC7_UNORM>, + MortonCopy<false, PixelFormat::BC6H_UFLOAT>, + MortonCopy<false, PixelFormat::BC6H_SFLOAT>, // TODO(Subv): Swizzling ASTC formats are not supported nullptr, - MortonCopy<false, PixelFormat::BGRA8>, - MortonCopy<false, PixelFormat::RGBA32F>, - MortonCopy<false, PixelFormat::RG32F>, - MortonCopy<false, PixelFormat::R32F>, - MortonCopy<false, PixelFormat::R16F>, - MortonCopy<false, PixelFormat::R16U>, - MortonCopy<false, PixelFormat::R16S>, - MortonCopy<false, PixelFormat::R16UI>, - MortonCopy<false, PixelFormat::R16I>, - MortonCopy<false, PixelFormat::RG16>, - MortonCopy<false, PixelFormat::RG16F>, - MortonCopy<false, PixelFormat::RG16UI>, - MortonCopy<false, PixelFormat::RG16I>, - MortonCopy<false, PixelFormat::RG16S>, - MortonCopy<false, PixelFormat::RGB32F>, - MortonCopy<false, PixelFormat::RGBA8_SRGB>, - MortonCopy<false, PixelFormat::RG8U>, - MortonCopy<false, PixelFormat::RG8S>, - MortonCopy<false, PixelFormat::RG8UI>, - MortonCopy<false, PixelFormat::RG32UI>, - MortonCopy<false, PixelFormat::RGBX16F>, - MortonCopy<false, PixelFormat::R32UI>, - MortonCopy<false, PixelFormat::R32I>, + MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>, + MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>, + MortonCopy<false, PixelFormat::R32G32B32A32_SINT>, + MortonCopy<false, PixelFormat::R32G32_FLOAT>, + MortonCopy<false, PixelFormat::R32G32_SINT>, + MortonCopy<false, PixelFormat::R32_FLOAT>, + MortonCopy<false, PixelFormat::R16_FLOAT>, + MortonCopy<false, PixelFormat::R16_UNORM>, + MortonCopy<false, PixelFormat::R16_SNORM>, + MortonCopy<false, PixelFormat::R16_UINT>, + MortonCopy<false, PixelFormat::R16_SINT>, + MortonCopy<false, PixelFormat::R16G16_UNORM>, + MortonCopy<false, PixelFormat::R16G16_FLOAT>, + MortonCopy<false, PixelFormat::R16G16_UINT>, + MortonCopy<false, PixelFormat::R16G16_SINT>, + MortonCopy<false, PixelFormat::R16G16_SNORM>, + MortonCopy<false, PixelFormat::R32G32B32_FLOAT>, + MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>, + MortonCopy<false, PixelFormat::R8G8_UNORM>, + MortonCopy<false, PixelFormat::R8G8_SNORM>, + MortonCopy<false, PixelFormat::R8G8_SINT>, + MortonCopy<false, PixelFormat::R8G8_UINT>, + MortonCopy<false, PixelFormat::R32G32_UINT>, + MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>, + MortonCopy<false, PixelFormat::R32_UINT>, + MortonCopy<false, PixelFormat::R32_SINT>, nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::BGRA8_SRGB>, - MortonCopy<false, PixelFormat::DXT1_SRGB>, - MortonCopy<false, PixelFormat::DXT23_SRGB>, - MortonCopy<false, PixelFormat::DXT45_SRGB>, - MortonCopy<false, PixelFormat::BC7U_SRGB>, - MortonCopy<false, PixelFormat::R4G4B4A4U>, + MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>, + MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>, + MortonCopy<false, PixelFormat::BC2_SRGB>, + MortonCopy<false, PixelFormat::BC3_SRGB>, + MortonCopy<false, PixelFormat::BC7_SRGB>, + MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>, nullptr, nullptr, nullptr, @@ -199,12 +221,12 @@ static constexpr ConversionArray linear_to_morton_fns = { nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::E5B9G9R9F>, - MortonCopy<false, PixelFormat::Z32F>, - MortonCopy<false, PixelFormat::Z16>, - MortonCopy<false, PixelFormat::Z24S8>, - MortonCopy<false, PixelFormat::S8Z24>, - MortonCopy<false, PixelFormat::Z32FS8>, + MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>, + MortonCopy<false, PixelFormat::D32_FLOAT>, + MortonCopy<false, PixelFormat::D16_UNORM>, + MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>, + MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>, + MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>, }; static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index eb5158407..4489abf61 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) { return type; } -std::string GlobalMemoryName(const GlobalMemoryBase& base) { - return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); -} - class ARBDecompiler final { public: explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, @@ -199,6 +195,8 @@ public: } private: + void DefineGlobalMemory(); + void DeclareHeader(); void DeclareVertex(); void DeclareGeometry(); @@ -228,6 +226,7 @@ private: std::pair<std::string, std::size_t> BuildCoords(Operation); std::string BuildAoffi(Operation); + std::string GlobalMemoryPointer(const GmemNode& gmem); void Exit(); std::string Assign(Operation); @@ -378,10 +377,8 @@ private: std::string address; std::string_view opname; if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); - opname = "ATOMB"; + address = GlobalMemoryPointer(*gmem); + opname = "ATOM"; } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); opname = "ATOMS"; @@ -456,9 +453,13 @@ private: shader_source += '\n'; } - std::string AllocTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}.x", num_temporaries++); + std::string AllocLongVectorTemporary() { + max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); + return fmt::format("L{}", num_long_temporaries++); + } + + std::string AllocLongTemporary() { + return fmt::format("{}.x", AllocLongVectorTemporary()); } std::string AllocVectorTemporary() { @@ -466,8 +467,13 @@ private: return fmt::format("T{}", num_temporaries++); } + std::string AllocTemporary() { + return fmt::format("{}.x", AllocVectorTemporary()); + } + void ResetTemporaries() noexcept { num_temporaries = 0; + num_long_temporaries = 0; } const Device& device; @@ -478,6 +484,11 @@ private: std::size_t num_temporaries = 0; std::size_t max_temporaries = 0; + std::size_t num_long_temporaries = 0; + std::size_t max_long_temporaries = 0; + + std::map<GlobalMemoryBase, u32> global_memory_names; + std::string shader_source; static constexpr std::string_view ADD_F32 = "ADD.F32"; @@ -784,6 +795,8 @@ private: ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier) : device{device}, ir{ir}, registry{registry}, stage{stage} { + DefineGlobalMemory(); + AddLine("TEMP RC;"); AddLine("TEMP FSWZA[4];"); AddLine("TEMP FSWZB[4];"); @@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) { } } +void ARBDecompiler::DefineGlobalMemory() { + u32 binding = 0; + for (const auto& pair : ir.GetGlobalMemory()) { + const GlobalMemoryBase base = pair.first; + global_memory_names.emplace(base, binding); + ++binding; + } +} + void ARBDecompiler::DeclareHeader() { AddLine("!!NV{}5.0", HeaderStageName(stage)); // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D AddLine("OPTION NV_internal;"); AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_storage_buffer;"); AddLine("OPTION NV_shader_thread_group;"); if (ir.UsesWarps() && device.HasWarpIntrinsics()) { AddLine("OPTION NV_shader_thread_shuffle;"); @@ -951,11 +972,10 @@ void ARBDecompiler::DeclareLocalMemory() { } void ARBDecompiler::DeclareGlobalMemory() { - u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& pair : ir.GetGlobalMemory()) { - const auto& base = pair.first; - AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); - ++binding; + const std::size_t num_entries = ir.GetGlobalMemory().size(); + if (num_entries > 0) { + const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; + AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); } } @@ -977,6 +997,9 @@ void ARBDecompiler::DeclareTemporaries() { for (std::size_t i = 0; i < max_temporaries; ++i) { AddLine("TEMP T{};", i); } + for (std::size_t i = 0; i < max_long_temporaries; ++i) { + AddLine("LONG TEMP L{};", i); + } } void ARBDecompiler::DeclarePredicates() { @@ -1339,10 +1362,7 @@ std::string ARBDecompiler::Visit(const Node& node) { if (const auto gmem = std::get_if<GmemNode>(&*node)) { std::string temporary = AllocTemporary(); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), - temporary); + AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); return temporary; } @@ -1419,6 +1439,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { return fmt::format(", offset({})", temporary); } +std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { + const u32 binding = global_memory_names.at(gmem.GetDescriptor()); + const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; + + const std::string pointer = AllocLongVectorTemporary(); + std::string temporary = AllocTemporary(); + + const u32 local_index = binding / 2; + AddLine("PK64.U {}, c[{}];", pointer, local_index); + AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), + Visit(gmem.GetBaseAddress())); + AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); + AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); + return fmt::format("{}.x", pointer); +} + void ARBDecompiler::Exit() { if (stage != ShaderType::Fragment) { AddLine("RET;"); @@ -1515,11 +1551,7 @@ std::string ARBDecompiler::Assign(Operation operation) { ResetTemporaries(); return {}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { - const std::string temporary = AllocTemporary(); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), - Visit(gmem->GetBaseAddress())); - AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), - temporary); + AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); ResetTemporaries(); return {}; } else { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e461e4c70..e866d8f2f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); - if (device.HasVertexBufferUnifiedMemory()) { + if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c1f20f0ab..630acb73b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -233,6 +233,8 @@ Device::Device() GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); + LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e1d811966..94d38d7d1 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -104,6 +104,10 @@ public: return use_assembly_shaders; } + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -127,6 +131,7 @@ private: bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; bool use_assembly_shaders{}; + bool use_asynchronous_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e960a0ef1..03e82c599 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -139,6 +139,18 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } +void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { + if (num_entries == 0) { + return; + } + if (num_entries % 2 == 1) { + pointers[num_entries] = 0; + } + const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); + glProgramLocalParametersI4uivNV(target, 0, num_vectors, + reinterpret_cast<const GLuint*>(pointers)); +} + } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, @@ -149,7 +161,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, - screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { + screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, + async_shaders{emu_window} { CheckExtensions(); unified_uniform_buffer.Create(); @@ -162,6 +175,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind nullptr, 0); } } + + if (device.UseAsynchronousShaders()) { + // Max worker threads we should allow + constexpr auto MAX_THREADS = 2u; + // Amount of threads we should reserve for other parts of yuzu + constexpr auto RESERVED_THREADS = 6u; + // Get the amount of threads we can use(this can return zero) + const auto cpu_thread_count = + std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); + // Deduce how many "extra" threads we have to use. + const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1u, max_threads_unused); + // Don't use more than MAX_THREADS + const auto worker_count = std::min(max_worker_count, MAX_THREADS); + async_shaders.AllocateWorkers(worker_count); + } } RasterizerOpenGL::~RasterizerOpenGL() { @@ -306,7 +336,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); - std::size_t num_ssbos = 0; u32 clip_distances = 0; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -329,31 +358,15 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } // Currently this stages are not supported in the OpenGL backend. - // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl) { - continue; - } else if (program == Maxwell::ShaderProgram::TesselationEval) { + // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL + if (program == Maxwell::ShaderProgram::TesselationControl || + program == Maxwell::ShaderProgram::TesselationEval) { continue; } - Shader* const shader = shader_cache.GetStageProgram(program); - - if (device.UseAssemblyShaders()) { - // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this - // all stages share the same bindings. - const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); - ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); - num_ssbos += num_stage_ssbos; - } + Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - // Stage indices are 0 - 5 - const std::size_t stage = index == 0 ? 0 : index - 1; - SetupDrawConstBuffers(stage, shader); - SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader); - SetupDrawImages(stage, shader); - - const GLuint program_handle = shader->GetHandle(); + const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: @@ -370,6 +383,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { shader_config.enable.Value(), shader_config.offset); } + // Stage indices are 0 - 5 + const std::size_t stage = index == 0 ? 0 : index - 1; + SetupDrawConstBuffers(stage, shader); + SetupDrawGlobalMemory(stage, shader); + SetupDrawTextures(stage, shader); + SetupDrawImages(stage, shader); + // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the @@ -731,6 +751,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { current_cbuf = 0; auto kernel = shader_cache.GetComputeKernel(code_addr); + program_manager.BindCompute(kernel->GetHandle()); + SetupComputeTextures(kernel); SetupComputeImages(kernel); @@ -745,7 +767,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -1005,40 +1026,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { + static constexpr std::array TARGET_LUT = { + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, + }; + auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + const auto& entries{shader->GetEntries().global_memory_entries}; + + std::array<GLuint64EXT, 32> pointers; + ASSERT(entries.size() < pointers.size()); - u32 binding = - device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; - for (const auto& entry : shader->GetEntries().global_memory_entries) { + const bool assembly_shaders = device.UseAssemblyShaders(); + u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; + for (const auto& entry : entries) { const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; const u32 size{memory_manager.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding++, entry, gpu_addr, size); + SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + ++binding; + } + if (assembly_shaders) { + UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); } } void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + const auto& entries{kernel->GetEntries().global_memory_entries}; + + std::array<GLuint64EXT, 32> pointers; + ASSERT(entries.size() < pointers.size()); u32 binding = 0; - for (const auto& entry : kernel->GetEntries().global_memory_entries) { - const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; - const auto gpu_addr{memory_manager.Read<u64>(addr)}; - const auto size{memory_manager.Read<u32>(addr + 8)}; - SetupGlobalMemory(binding++, entry, gpu_addr, size); + for (const auto& entry : entries) { + const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; + const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; + const u32 size{memory_manager.Read<u32>(addr + 8)}; + SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); + ++binding; + } + if (device.UseAssemblyShaders()) { + UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); } } void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, - GPUVAddr gpu_addr, std::size_t size) { - const auto alignment{device.GetShaderStorageBufferAlignment()}; + GPUVAddr gpu_addr, std::size_t size, + GLuint64EXT* pointer) { + const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, - static_cast<GLsizeiptr>(size)); + if (device.UseAssemblyShaders()) { + *pointer = info.address + info.offset; + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, + static_cast<GLsizeiptr>(size)); + } } void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f082592f..ccc6f50f6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -33,6 +33,7 @@ #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core { @@ -91,6 +92,14 @@ public: return num_queued_commands > 0; } + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -115,9 +124,9 @@ private: /// Configures the current global memory entries to use for the kernel invocation. void SetupComputeGlobalMemory(Shader* kernel); - /// Configures a constant buffer. + /// Configures a global memory buffer. void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, - std::size_t size); + std::size_t size, GLuint64EXT* pointer); /// Configures the current textures to use for the draw command. void SetupDrawTextures(std::size_t stage_index, Shader* shader); @@ -242,6 +251,7 @@ private: ScreenInfo& screen_info; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCommon::Shader::AsyncShaders async_shaders; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f8b322227..b05cb641c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -177,6 +177,12 @@ public: Release(); } + OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c6a3bf3a1..f469ed656 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -31,6 +31,7 @@ #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace OpenGL { @@ -140,9 +141,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { return registry; } +std::unordered_set<GLenum> GetSupportedFormats() { + GLint num_formats; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + + std::vector<GLint> formats(num_formats); + glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + + std::unordered_set<GLenum> supported_formats; + for (const GLint format : formats) { + supported_formats.insert(static_cast<GLenum>(format)); + } + return supported_formats; +} + +} // Anonymous namespace + ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, - bool hint_retrievable = false) { + const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { const std::string shader_id = MakeShaderID(unique_identifier, shader_type); LOG_INFO(Render_OpenGL, "{}", shader_id); @@ -181,30 +197,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u return program; } -std::unordered_set<GLenum> GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector<GLint> formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set<GLenum> supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast<GLenum>(format)); - } - return supported_formats; -} - -} // Anonymous namespace - Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, - ProgramSharedPtr program_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { + ProgramSharedPtr program_, bool is_built) + : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, + is_built(is_built) { handle = program->assembly_program.handle; if (handle == 0) { handle = program->source_program.handle; } - ASSERT(handle != 0); + if (is_built) { + ASSERT(handle != 0); + } } Shader::~Shader() = default; @@ -214,42 +217,82 @@ GLuint Shader::GetHandle() const { return handle; } -std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode code, ProgramCode code_b) { +bool Shader::IsBuilt() const { + return is_built; +} + +void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { + program->source_program = std::move(new_program); + handle = program->source_program.handle; + is_built = true; +} + +void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { + program->assembly_program = std::move(new_program); + handle = program->assembly_program.handle; + is_built = true; +} + +std::unique_ptr<Shader> Shader::CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, + ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { const auto shader_type = GetShaderType(program_type); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional<ShaderIR> ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + auto& gpu = params.system.GPU(); + gpu.ShaderNotify().MarkSharderBuilding(); + + auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); + if (!async_shaders.IsShaderAsync(params.system.GPU()) || + !params.device.UseAsynchronousShaders()) { + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (!code_b.empty()) { + // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); + // } + auto program = + BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + ShaderDiskCacheEntry entry; + entry.type = shader_type; + entry.code = std::move(code); + entry.code_b = std::move(code_b); + entry.unique_identifier = params.unique_identifier; + entry.bound_buffer = registry->GetBoundBuffer(); + entry.graphics_info = registry->GetGraphicsInfo(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + gpu.ShaderNotify().MarkShaderComplete(); + + return std::unique_ptr<Shader>(new Shader(std::move(registry), + MakeEntries(params.device, ir, shader_type), + std::move(program), true)); + } else { + // Required for entries + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + auto entries = MakeEntries(params.device, ir, shader_type); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); + async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, + std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, + COMPILER_SETTINGS, *registry, cpu_addr); - return std::unique_ptr<Shader>(new Shader( - std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); + auto program = std::make_shared<ProgramHandle>(); + return std::unique_ptr<Shader>( + new Shader(std::move(registry), std::move(entries), std::move(program), false)); + } } std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto& engine = params.system.GPU().KeplerCompute(); + auto& gpu = params.system.GPU(); + gpu.ShaderNotify().MarkSharderBuilding(); + + auto& engine = gpu.KeplerCompute(); auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const u64 uid = params.unique_identifier; @@ -266,6 +309,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); + gpu.ShaderNotify().MarkShaderComplete(); + return std::unique_ptr<Shader>(new Shader(std::move(registry), MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); @@ -436,14 +481,51 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( return program; } -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { +Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders) { if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { - return last_shaders[static_cast<std::size_t>(program)]; + auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; + if (last_shader->IsBuilt()) { + return last_shader; + } } auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr address{GetShaderAddress(system, program)}; + if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { + auto completed_work = async_shaders.GetCompletedWork(); + for (auto& work : completed_work) { + Shader* shader = TryGet(work.cpu_address); + auto& gpu = system.GPU(); + gpu.ShaderNotify().MarkShaderComplete(); + if (shader == nullptr) { + continue; + } + using namespace VideoCommon::Shader; + if (work.backend == AsyncShaders::Backend::OpenGL) { + shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + shader->AsyncGLASMBuilt(std::move(work.program.glasm)); + } + + ShaderDiskCacheEntry entry; + entry.type = work.shader_type; + entry.code = std::move(work.code); + entry.code_b = std::move(work.code_b); + entry.unique_identifier = work.uid; + + auto& registry = shader->GetRegistry(); + + entry.bound_buffer = registry.GetBoundBuffer(); + entry.graphics_info = registry.GetGraphicsInfo(); + entry.keys = registry.GetKeys(); + entry.bound_samplers = registry.GetBoundSamplers(); + entry.bindless_samplers = registry.GetBindlessSamplers(); + disk_cache.SaveEntry(std::move(entry)); + } + } + // Look up shader in the cache based on address const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { @@ -471,7 +553,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { std::unique_ptr<Shader> shader; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); + shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), + async_shaders, cpu_addr.value_or(0)); } else { shader = Shader::CreateFromCache(params, found->second); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 994aaeaf2..7528ac686 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -33,6 +33,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace VideoCommon::Shader { +class AsyncShaders; +} + namespace OpenGL { class Device; @@ -61,6 +65,11 @@ struct ShaderParameters { u64 unique_identifier; }; +ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, + u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, + bool hint_retrievable = false); + class Shader final { public: ~Shader(); @@ -68,15 +77,28 @@ public: /// Gets the GL program handle for the shader GLuint GetHandle() const; + bool IsBuilt() const; + /// Gets the shader entries for the shader const ShaderEntries& GetEntries() const { return entries; } - static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode program_code, - ProgramCode program_code_b); + const VideoCommon::Shader::Registry& GetRegistry() const { + return *registry; + } + + /// Mark a OpenGL shader as built + void AsyncOpenGLBuilt(OGLProgram new_program); + + /// Mark a GLASM shader as built + void AsyncGLASMBuilt(OGLAssemblyProgram new_program); + + static std::unique_ptr<Shader> CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, + ProgramCode program_code, ProgramCode program_code_b, + VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); + static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); @@ -85,12 +107,13 @@ public: private: explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, - ProgramSharedPtr program); + ProgramSharedPtr program, bool is_built = true); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; ProgramSharedPtr program; GLuint handle = 0; + bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { @@ -104,7 +127,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program); + Shader* GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders); /// Gets a compute kernel in the passed address Shader* GetComputeKernel(GPUVAddr code_addr); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8e754fa90..691c6c79b 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -11,8 +11,30 @@ namespace OpenGL { -ProgramManager::ProgramManager(const Device& device) { - use_assembly_programs = device.UseAssemblyShaders(); +namespace { + +void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { + if (current == old) { + return; + } + if (current == 0) { + if (enabled) { + enabled = false; + glDisable(stage); + } + return; + } + if (!enabled) { + enabled = true; + glEnable(stage); + } + glBindProgramARB(stage, current); +} + +} // Anonymous namespace + +ProgramManager::ProgramManager(const Device& device) + : use_assembly_programs{device.UseAssemblyShaders()} { if (use_assembly_programs) { glEnable(GL_COMPUTE_PROGRAM_NV); } else { @@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) { } void ProgramManager::BindGraphicsPipeline() { - if (use_assembly_programs) { - UpdateAssemblyPrograms(); - } else { + if (!use_assembly_programs) { UpdateSourcePrograms(); } } @@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() { } } -void ProgramManager::UpdateAssemblyPrograms() { - const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); - }; +void ProgramManager::UseVertexShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); + } + current_state.vertex = program; +} - update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, - old_state.geometry); - update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, - old_state.fragment); +void ProgramManager::UseGeometryShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); + } + current_state.geometry = program; +} - old_state = current_state; +void ProgramManager::UseFragmentShader(GLuint program) { + if (use_assembly_programs) { + BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); + } + current_state.fragment = program; } void ProgramManager::UpdateSourcePrograms() { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 0f03b4f12..950e0dfcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,17 +45,9 @@ public: /// Rewinds BindHostPipeline state changes. void RestoreGuestPipeline(); - void UseVertexShader(GLuint program) { - current_state.vertex = program; - } - - void UseGeometryShader(GLuint program) { - current_state.geometry = program; - } - - void UseFragmentShader(GLuint program) { - current_state.fragment = program; - } + void UseVertexShader(GLuint program); + void UseGeometryShader(GLuint program); + void UseFragmentShader(GLuint program); private: struct PipelineState { @@ -64,9 +56,6 @@ private: GLuint fragment = 0; }; - /// Update NV_gpu_program5 programs. - void UpdateAssemblyPrograms(); - /// Update GLSL programs. void UpdateSourcePrograms(); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 3655ff629..887995cf4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver mapped_ptr = static_cast<u8*>( glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); - if (device.HasVertexBufferUnifiedMemory()) { + if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 61505879b..0a7bc9e2b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -41,91 +41,103 @@ struct FormatTuple { }; constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45 - {GL_COMPRESSED_RED_RGTC1}, // DXN1 - {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16 - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16 - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F - {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F - {GL_R32F, GL_RED, GL_FLOAT}, // R32F - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S - {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG8UI - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8 - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5 - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4 - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5 + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8 + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6 + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10 + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12 + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6 + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5 + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24 - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT }}; const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { @@ -178,10 +190,10 @@ GLint GetSwizzleSource(SwizzleSource source) { GLenum GetComponent(PixelFormat format, bool is_first) { switch (format) { - case PixelFormat::Z24S8: - case PixelFormat::Z32FS8: + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; - case PixelFormat::S8Z24: + case PixelFormat::S8_UINT_D24_UNORM: return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT; default: UNREACHABLE(); @@ -482,9 +494,9 @@ GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_sou std::array swizzle{x_source, y_source, z_source, w_source}; switch (const PixelFormat format = GetSurfaceParams().pixel_format) { - case PixelFormat::Z24S8: - case PixelFormat::Z32FS8: - case PixelFormat::S8Z24: + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::D32_FLOAT_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, GetComponent(format, x_source == SwizzleSource::R)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e66cdc083..52e9e8250 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -535,12 +535,12 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, GLint internal_format; switch (framebuffer.pixel_format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: internal_format = GL_RGBA8; texture.gl_format = GL_RGBA; texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; break; - case Tegra::FramebufferConfig::PixelFormat::RGB565: + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: internal_format = GL_RGB565; texture.gl_format = GL_RGB; texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index d7f1ae89f..f8c77f4fa 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -117,90 +117,101 @@ struct FormatTuple { VkFormat format; ///< Vulkan format int usage = 0; ///< Describes image format usage } constexpr tex_format_tuples[] = { - {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U - {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S - {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI - {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U - {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U - {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle) - {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U - {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI - {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F - {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U - {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S - {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI - {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F - {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI - {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1 - {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23 - {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45 - {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1 - {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM - {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM - {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U - {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 - {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 - {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 - {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8 - {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F - {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F - {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F - {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F - {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U - {VK_FORMAT_UNDEFINED}, // R16S - {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI - {VK_FORMAT_UNDEFINED}, // R16I - {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 - {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F - {VK_FORMAT_UNDEFINED}, // RG16UI - {VK_FORMAT_UNDEFINED}, // RG16I - {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S - {VK_FORMAT_UNDEFINED}, // RGB32F - {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB - {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U - {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S - {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // RG8UI - {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI - {VK_FORMAT_UNDEFINED}, // RGBX16F - {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI - {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I - {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 - {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 - {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 - {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB - {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB - {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB - {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB - {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB - {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U - {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB - {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB - {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB - {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB - {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5 - {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB - {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8 - {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB - {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6 - {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB - {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10 - {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB - {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12 - {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB - {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6 - {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB - {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5 - {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB - {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F + {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // A8B8G8R8_UNORM + {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // A8B8G8R8_SNORM + {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT + {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT + {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM + {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM + {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM + {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT + {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) + {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM + {VK_FORMAT_R8_SNORM, Attachable | Storage}, // R8_SNORM + {VK_FORMAT_R8_SINT, Attachable | Storage}, // R8_SINT + {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8_UINT + {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // R16G16B16A16_FLOAT + {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // R16G16B16A16_UNORM + {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // R16G16B16A16_SNORM + {VK_FORMAT_R16G16B16A16_SINT, Attachable | Storage}, // R16G16B16A16_SINT + {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // R16G16B16A16_UINT + {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // B10G11R11_FLOAT + {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // R32G32B32A32_UINT + {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // BC1_RGBA_UNORM + {VK_FORMAT_BC2_UNORM_BLOCK}, // BC2_UNORM + {VK_FORMAT_BC3_UNORM_BLOCK}, // BC3_UNORM + {VK_FORMAT_BC4_UNORM_BLOCK}, // BC4_UNORM + {VK_FORMAT_BC4_SNORM_BLOCK}, // BC4_SNORM + {VK_FORMAT_BC5_UNORM_BLOCK}, // BC5_UNORM + {VK_FORMAT_BC5_SNORM_BLOCK}, // BC5_SNORM + {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7_UNORM + {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UFLOAT + {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SFLOAT + {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4_UNORM + {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // B8G8R8A8_UNORM + {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // R32G32B32A32_FLOAT + {VK_FORMAT_R32G32B32A32_SINT, Attachable | Storage}, // R32G32B32A32_SINT + {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // R32G32_FLOAT + {VK_FORMAT_R32G32_SINT, Attachable | Storage}, // R32G32_SINT + {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT + {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT + {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM + {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT + {VK_FORMAT_UNDEFINED}, // R16_SINT + {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM + {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT + {VK_FORMAT_UNDEFINED}, // R16G16_UINT + {VK_FORMAT_UNDEFINED}, // R16G16_SINT + {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM + {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT + {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB + {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM + {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM + {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT + {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // R8G8_UINT + {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // R32G32_UINT + {VK_FORMAT_UNDEFINED}, // R16G16B16X16_FLOAT + {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32_UINT + {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32_SINT + {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8_UNORM + {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5_UNORM + {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4_UNORM + {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // B8G8R8A8_SRGB + {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // BC1_RGBA_SRGB + {VK_FORMAT_BC2_SRGB_BLOCK}, // BC2_SRGB + {VK_FORMAT_BC3_SRGB_BLOCK}, // BC3_SRGB + {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7_SRGB + {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // A4B4G4R4_UNORM + {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB + {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB + {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB + {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB + {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5_UNORM + {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB + {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8_UNORM + {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB + {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6_UNORM + {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB + {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10_UNORM + {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB + {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12_UNORM + {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB + {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6_UNORM + {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB + {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM + {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB + {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT // Depth formats - {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F - {VK_FORMAT_D16_UNORM, Attachable}, // Z16 + {VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT + {VK_FORMAT_D16_UNORM, Attachable}, // D16_UNORM // DepthStencil formats - {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8 - {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated) - {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8 + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // D24_UNORM_S8_UINT + {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8_UINT_D24_UNORM (emulated) + {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // D32_FLOAT_S8_UINT }; static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); @@ -221,7 +232,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; } - // Use ABGR8 on hardware that doesn't support ASTC natively + // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 866813465..ce53e5a6b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -187,9 +187,9 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { switch (framebuffer.pixel_format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: return VK_FORMAT_A8B8G8R8_UNORM_PACK32; - case Tegra::FramebufferConfig::PixelFormat::RGB565: + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: return VK_FORMAT_R5G6B5_UNORM_PACK16; default: UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2be38d419..1d2f8b557 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -39,16 +39,17 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(size); - ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; + : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(size), + .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; buffer.handle = device.GetLogical().CreateBuffer(ci); buffer.commit = memory_manager.Commit(buffer.handle, false); @@ -66,16 +67,17 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, barrier, {}); }); @@ -87,16 +89,17 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { const VkBuffer handle = Handle(); scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index da71e710c..182461ed9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -115,32 +115,32 @@ constexpr u8 quad_array[] = { 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { - VkDescriptorSetLayoutBinding binding; - binding.binding = 0; - binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = nullptr; - return binding; + return { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }; } VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - VkPushConstantRange range; - range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - range.offset = 0; - range.size = static_cast<u32>(size); - return range; + return { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast<u32>(size), + }; } // Uint8 SPIR-V module. Generated from the "shaders/" directory. @@ -344,29 +344,33 @@ constexpr u8 QUAD_INDEXED_SPV[] = { 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { - std::array<VkDescriptorSetLayoutBinding, 2> bindings; - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[0].pImmutableSamplers = nullptr; - bindings[1].binding = 1; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[1].pImmutableSamplers = nullptr; - return bindings; + return {{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + }}; } VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 2; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } } // Anonymous namespace @@ -376,37 +380,37 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, const u8* code) { - VkDescriptorSetLayoutCreateInfo descriptor_layout_ci; - descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_layout_ci.pNext = nullptr; - descriptor_layout_ci.flags = 0; - descriptor_layout_ci.bindingCount = bindings.size(); - descriptor_layout_ci.pBindings = bindings.data(); - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci); - - VkPipelineLayoutCreateInfo pipeline_layout_ci; - pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_ci.pNext = nullptr; - pipeline_layout_ci.flags = 0; - pipeline_layout_ci.setLayoutCount = 1; - pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address(); - pipeline_layout_ci.pushConstantRangeCount = push_constants.size(); - pipeline_layout_ci.pPushConstantRanges = push_constants.data(); - layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci); + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = bindings.size(), + .pBindings = bindings.data(), + }); + + layout = device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), + }); if (!templates.empty()) { - VkDescriptorUpdateTemplateCreateInfoKHR template_ci; - template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - template_ci.pNext = nullptr; - template_ci.flags = 0; - template_ci.descriptorUpdateEntryCount = templates.size(); - template_ci.pDescriptorUpdateEntries = templates.data(); - template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - template_ci.descriptorSetLayout = *descriptor_set_layout; - template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - template_ci.pipelineLayout = *layout; - template_ci.set = 0; - descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci); + descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = templates.size(), + .pDescriptorUpdateEntries = templates.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = 0, + }); descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } @@ -414,32 +418,32 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); std::memcpy(code_copy.get(), code, code_size); - VkShaderModuleCreateInfo module_ci; - module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - module_ci.pNext = nullptr; - module_ci.flags = 0; - module_ci.codeSize = code_size; - module_ci.pCode = code_copy.get(); - module = device.GetLogical().CreateShaderModule(module_ci); - - VkComputePipelineCreateInfo pipeline_ci; - pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline_ci.pNext = nullptr; - pipeline_ci.flags = 0; - pipeline_ci.layout = *layout; - pipeline_ci.basePipelineHandle = nullptr; - pipeline_ci.basePipelineIndex = 0; - - VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); + module = device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = code_copy.get(), + }); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 281bf9ac3..ed9d2991c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -43,12 +43,13 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { // TODO(Rodrigo): Maybe make individual bindings here? for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = 1; - entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); } }; add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); @@ -58,25 +59,25 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = static_cast<u32>(bindings.size()); - ci.pBindings = bindings.data(); - return device.GetLogical().CreateDescriptorSetLayout(ci); + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(bindings.size()), + .pBindings = bindings.data(), + }); } vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; - return device.GetLogical().CreatePipelineLayout(ci); + return device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); } vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { @@ -89,59 +90,63 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat return {}; } - VkDescriptorUpdateTemplateCreateInfoKHR ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - ci.pNext = nullptr; - ci.flags = 0; - ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); - ci.pDescriptorUpdateEntries = template_entries.data(); - ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - ci.descriptorSetLayout = *descriptor_set_layout; - ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - ci.pipelineLayout = *layout; - ci.set = DESCRIPTOR_SET; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), + .pDescriptorUpdateEntries = template_entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = DESCRIPTOR_SET, + }); } vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { device.SaveShader(code); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code.size() * sizeof(u32); - ci.pCode = code.data(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code.size() * sizeof(u32), + .pCode = code.data(), + }); } vk::Pipeline VKComputePipeline::CreatePipeline() const { - VkComputePipelineCreateInfo ci; - VkPipelineShaderStageCreateInfo& stage_ci = ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *shader_module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; - subgroup_size_ci.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - subgroup_size_ci.pNext = nullptr; - subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + + VkComputePipelineCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *shader_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }; + + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - stage_ci.pNext = &subgroup_size_ci; + ci.stage.pNext = &subgroup_size_ci; } - ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.layout = *layout; - ci.basePipelineHandle = nullptr; - ci.basePipelineIndex = 0; return device.GetLogical().CreateComputePipeline(ci); } diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 9259b618d..ac4a0884e 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -43,27 +43,30 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; - - VkDescriptorPoolCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - ci.maxSets = num_sets; - ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes)); - ci.pPoolSizes = std::data(pool_sizes); + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, + }; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = num_sets, + .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), + .pPoolSizes = std::data(pool_sizes), + }; return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); } vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count) { const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.descriptorPool = **active_pool; - ai.descriptorSetCount = static_cast<u32>(count); - ai.pSetLayouts = layout_copies.data(); + VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = **active_pool, + .descriptorSetCount = static_cast<u32>(count), + .pSetLayouts = layout_copies.data(), + }; vk::DescriptorSets sets = active_pool->Allocate(ai); if (!sets.IsOutOfPoolMemory()) { diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9226e591c..6245e0d78 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -84,14 +84,19 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_A8B8G8R8_UNORM_PACK32, VK_FORMAT_A8B8G8R8_UINT_PACK32, VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SINT_PACK32, VK_FORMAT_A8B8G8R8_SRGB_PACK32, VK_FORMAT_B5G6R5_UNORM_PACK16, VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A2B10G10R10_UINT_PACK32, VK_FORMAT_A1R5G5B5_UNORM_PACK16, VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32G32B32A32_UINT, VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R16G16B16A16_UINT, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UNORM, @@ -103,8 +108,11 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_SINT, VK_FORMAT_R8G8_UINT, VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_SNORM, + VK_FORMAT_R8_SINT, VK_FORMAT_R8_UINT, VK_FORMAT_B10G11R11_UFLOAT_PACK32, VK_FORMAT_R32_SFLOAT, @@ -124,6 +132,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK, @@ -757,14 +766,14 @@ std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const queue_cis.reserve(unique_queue_families.size()); for (const u32 queue_family : unique_queue_families) { - queue_cis.push_back({ + auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .pNext = nullptr, .flags = 0, .queueFamilyIndex = queue_family, - .queueCount = 1, - .pQueuePriorities = &QUEUE_PRIORITY, }); + ci.queueCount = 1; + ci.pQueuePriorities = &QUEUE_PRIORITY; } return queue_cis; diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 9bceb3861..1c418ea17 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -102,21 +102,29 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num void VKImage::CreatePresentView() { // Image type has to be 2D to be presented. - VkImageViewCreateInfo image_view_ci; - image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - image_view_ci.pNext = nullptr; - image_view_ci.flags = 0; - image_view_ci.image = *image; - image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_ci.format = format; - image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - image_view_ci.subresourceRange.aspectMask = aspect_mask; - image_view_ci.subresourceRange.baseMipLevel = 0; - image_view_ci.subresourceRange.levelCount = 1; - image_view_ci.subresourceRange.baseArrayLayer = 0; - image_view_ci.subresourceRange.layerCount = 1; - present_view = device.GetLogical().CreateImageView(image_view_ci); + present_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); } VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index b4c650a63..24c8960ac 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -178,13 +178,12 @@ bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 t }(); // Try to allocate found type. - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = size; - memory_ai.memoryTypeIndex = type; - - vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai); + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = size, + .memoryTypeIndex = type, + }); if (!memory) { LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); return false; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3da835324..42b3a744c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -88,12 +88,13 @@ void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& bindi // Combined image samplers can be arrayed. count = container[i].size; } - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = count; - entry.stageFlags = stage_flags; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = count, + .stageFlags = stage_flags, + .pImmutableSamplers = nullptr, + }); } } @@ -259,10 +260,10 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } } - Specialization specialization; - specialization.workgroup_size = key.workgroup_size; - specialization.shared_memory_size = key.shared_memory_size; - + const Specialization specialization{ + .workgroup_size = key.workgroup_size, + .shared_memory_size = key.shared_memory_size, + }; const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, shader->GetRegistry(), specialization), shader->GetEntries()}; @@ -370,13 +371,14 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { for (u32 i = 0; i < count; ++i) { const u32 num_samplers = container[i].size; - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = num_samplers; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = num_samplers, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); ++binding; offset += num_samplers * entry_size; @@ -389,22 +391,24 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 // Nvidia has a bug where updating multiple texels at once causes the driver to crash. // Note: Fixed in driver Windows 443.24, Linux 440.66.15 for (u32 i = 0; i < count; ++i) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding + i; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = descriptor_type; - entry.offset = static_cast<std::size_t>(offset + i * entry_size); - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding + i, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = descriptor_type, + .offset = static_cast<std::size_t>(offset + i * entry_size), + .stride = entry_size, + }); } } else if (count > 0) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = count; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = count, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); } offset += count * entry_size; binding += count; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index bc91c48cc..6cd63d090 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -47,14 +47,14 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { void QueryPool::Allocate(std::size_t begin, std::size_t end) { usage.resize(end); - VkQueryPoolCreateInfo query_pool_ci; - query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - query_pool_ci.pNext = nullptr; - query_pool_ci.flags = 0; - query_pool_ci.queryType = GetTarget(type); - query_pool_ci.queryCount = static_cast<u32>(end - begin); - query_pool_ci.pipelineStatistics = 0; - pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci)); + pools.push_back(device->GetLogical().CreateQueryPool({ + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queryType = GetTarget(type), + .queryCount = static_cast<u32>(end - begin), + .pipelineStatistics = 0, + })); } void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7625871c2..31e44aa2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -64,20 +64,22 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - VkViewport viewport; - viewport.x = src.translate_x - src.scale_x; - viewport.y = src.translate_y - src.scale_y; - viewport.width = width != 0.0f ? width : 1.0f; - viewport.height = height != 0.0f ? height : 1.0f; + VkViewport viewport{ + .x = src.translate_x - src.scale_x, + .y = src.translate_y - src.scale_y, + .width = width != 0.0f ? width : 1.0f, + .height = height != 0.0f ? height : 1.0f, + .minDepth = src.translate_z - src.scale_z * reduce_z, + .maxDepth = src.translate_z + src.scale_z, + }; - const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - viewport.minDepth = src.translate_z - src.scale_z * reduce_z; - viewport.maxDepth = src.translate_z + src.scale_z; if (!device.IsExtDepthRangeUnrestrictedSupported()) { viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } + return viewport; } @@ -508,10 +510,11 @@ void RasterizerVulkan::Clear() { const u32 color_attachment = regs.clear_buffers.RT; scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { - VkClearAttachment attachment; - attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - attachment.colorAttachment = color_attachment; - attachment.clearValue = clear_value; + const VkClearAttachment attachment{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = color_attachment, + .clearValue = clear_value, + }; cmdbuf.ClearAttachments(attachment, clear_rect); }); } @@ -551,13 +554,16 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { query_cache.UpdateCounters(); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - ComputePipelineCacheKey key; - key.shader = code_addr; - key.shared_memory_size = launch_desc.shared_alloc; - key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z}; - - auto& pipeline = pipeline_cache.GetComputePipeline(key); + auto& pipeline = pipeline_cache.GetComputePipeline({ + .shader = code_addr, + .shared_memory_size = launch_desc.shared_alloc, + .workgroup_size = + { + launch_desc.block_dim_x, + launch_desc.block_dim_y, + launch_desc.block_dim_z, + }, + }); // Compute dispatches can't be executed inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); @@ -841,17 +847,17 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - VkFramebufferCreateInfo framebuffer_ci; - framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_ci.pNext = nullptr; - framebuffer_ci.flags = 0; - framebuffer_ci.renderPass = key.renderpass; - framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size()); - framebuffer_ci.pAttachments = key.views.data(); - framebuffer_ci.width = key.width; - framebuffer_ci.height = key.height; - framebuffer_ci.layers = key.layers; - framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci); + framebuffer = device.GetLogical().CreateFramebuffer({ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = key.renderpass, + .attachmentCount = static_cast<u32>(key.views.size()), + .pAttachments = key.views.data(), + .width = key.width, + .height = key.height, + .layers = key.layers, + }); } return {*framebuffer, VkExtent2D{key.width, key.height}}; @@ -1553,17 +1559,17 @@ VkBuffer RasterizerVulkan::DefaultBuffer() { return *default_buffer; } - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = DEFAULT_BUFFER_SIZE; - ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - default_buffer = device.GetLogical().CreateBuffer(ci); + default_buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = DEFAULT_BUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); default_buffer_commit = memory_manager.Commit(default_buffer, false); scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 3f71d005e..80284cf92 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -39,10 +39,14 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { using namespace VideoCore::Surface; + const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); + std::vector<VkAttachmentDescription> descriptors; + descriptors.reserve(num_attachments); + std::vector<VkAttachmentReference> color_references; + color_references.reserve(num_attachments); - const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); for (std::size_t rt = 0; rt < num_attachments; ++rt) { const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); @@ -54,20 +58,22 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - descriptor.initialLayout = color_layout; - descriptor.finalLayout = color_layout; - - VkAttachmentReference& reference = color_references.emplace_back(); - reference.attachment = static_cast<u32>(rt); - reference.layout = color_layout; + descriptors.push_back({ + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = color_layout, + .finalLayout = color_layout, + }); + + color_references.push_back({ + .attachment = static_cast<u32>(rt), + .layout = color_layout, + }); } VkAttachmentReference zeta_attachment_ref; @@ -82,32 +88,36 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout zeta_layout = params.zeta_texception != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = 0; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.initialLayout = zeta_layout; - descriptor.finalLayout = zeta_layout; - - zeta_attachment_ref.attachment = static_cast<u32>(num_attachments); - zeta_attachment_ref.layout = zeta_layout; + descriptors.push_back({ + .flags = 0, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = zeta_layout, + .finalLayout = zeta_layout, + }); + + zeta_attachment_ref = { + .attachment = static_cast<u32>(num_attachments), + .layout = zeta_layout, + }; } - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); - subpass_description.pColorAttachments = color_references.data(); - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; + const VkSubpassDescription subpass_description{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast<u32>(color_references.size()), + .pColorAttachments = color_references.data(), + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; VkAccessFlags access = 0; VkPipelineStageFlags stage = 0; @@ -122,26 +132,27 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; } - VkSubpassDependency subpass_dependency; - subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL; - subpass_dependency.dstSubpass = 0; - subpass_dependency.srcStageMask = stage; - subpass_dependency.dstStageMask = stage; - subpass_dependency.srcAccessMask = 0; - subpass_dependency.dstAccessMask = access; - subpass_dependency.dependencyFlags = 0; - - VkRenderPassCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.attachmentCount = static_cast<u32>(descriptors.size()); - ci.pAttachments = descriptors.data(); - ci.subpassCount = 1; - ci.pSubpasses = &subpass_description; - ci.dependencyCount = 1; - ci.pDependencies = &subpass_dependency; - return device.GetLogical().CreateRenderPass(ci); + const VkSubpassDependency subpass_dependency{ + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = stage, + .dstStageMask = stage, + .srcAccessMask = 0, + .dstAccessMask = access, + .dependencyFlags = 0, + }; + + return device.GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast<u32>(descriptors.size()), + .pAttachments = descriptors.data(), + .subpassCount = 1, + .pSubpasses = &subpass_description, + .dependencyCount = 1, + .pDependencies = &subpass_dependency, + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index dc06f545a..f19330a36 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -18,33 +18,32 @@ namespace { constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; constexpr std::size_t FENCES_GROW_STEP = 0x40; -VkFenceCreateInfo BuildFenceCreateInfo() { - VkFenceCreateInfo fence_ci; - fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_ci.pNext = nullptr; - fence_ci.flags = 0; - return fence_ci; +constexpr VkFenceCreateInfo BuildFenceCreateInfo() { + return { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; } } // Anonymous namespace class CommandBufferPool final : public VKFencedPool { public: - CommandBufferPool(const VKDevice& device) + explicit CommandBufferPool(const VKDevice& device) : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} void Allocate(std::size_t begin, std::size_t end) override { // Command buffers are going to be commited, recorded, executed every single usage cycle. // They are also going to be reseted when commited. - VkCommandPoolCreateInfo command_pool_ci; - command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - command_pool_ci.pNext = nullptr; - command_pool_ci.flags = - VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily(); - Pool& pool = pools.emplace_back(); - pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci); + pool.handle = device.GetLogical().CreateCommandPool({ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = device.GetGraphicsFamily(), + }); pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); } diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 616eacc36..2d5460776 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -44,32 +44,35 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); const std::array color = tsc.GetBorderColor(); - VkSamplerCustomBorderColorCreateInfoEXT border; - border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; - border.pNext = nullptr; - border.format = VK_FORMAT_UNDEFINED; + VkSamplerCustomBorderColorCreateInfoEXT border{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .pNext = nullptr, + .format = VK_FORMAT_UNDEFINED, + }; std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - VkSamplerCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - ci.pNext = arbitrary_borders ? &border : nullptr; - ci.flags = 0; - ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); - ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter); - ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); - ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter); - ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter); - ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter); - ci.mipLodBias = tsc.GetLodBias(); - ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE; - ci.maxAnisotropy = tsc.GetMaxAnisotropy(); - ci.compareEnable = tsc.depth_compare_enabled; - ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); - ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(); - ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(); - ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); - ci.unnormalizedCoordinates = VK_FALSE; - return device.GetLogical().CreateSampler(ci); + return device.GetLogical().CreateSampler({ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = arbitrary_borders ? &border : nullptr, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.GetLodBias(), + .anisotropyEnable = + static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = tsc.GetMaxAnisotropy(), + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); } VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 56524e6f3..dbbd0961a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -100,16 +100,19 @@ void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer frame state.framebuffer = framebuffer; state.render_area = render_area; - VkRenderPassBeginInfo renderpass_bi; - renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderpass_bi.pNext = nullptr; - renderpass_bi.renderPass = renderpass; - renderpass_bi.framebuffer = framebuffer; - renderpass_bi.renderArea.offset.x = 0; - renderpass_bi.renderArea.offset.y = 0; - renderpass_bi.renderArea.extent = render_area; - renderpass_bi.clearValueCount = 0; - renderpass_bi.pClearValues = nullptr; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { if (end_renderpass) { @@ -157,16 +160,17 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { current_cmdbuf.End(); - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.pWaitDstStageMask = nullptr; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = current_cmdbuf.address(); - submit_info.signalSemaphoreCount = semaphore ? 1 : 0; - submit_info.pSignalSemaphores = &semaphore; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = current_cmdbuf.address(), + .signalSemaphoreCount = semaphore ? 1U : 0U, + .pSignalSemaphores = &semaphore, + }; switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) { case VK_SUCCESS: break; @@ -181,19 +185,18 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { void VKScheduler::AllocateNewContext() { ++ticks; - VkCommandBufferBeginInfo cmdbuf_bi; - cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmdbuf_bi.pNext = nullptr; - cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - cmdbuf_bi.pInheritanceInfo = nullptr; - std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), device.GetDispatchLoader()); - current_cmdbuf.Begin(cmdbuf_bi); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 112df9c71..c1a218d76 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -19,13 +19,13 @@ vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, cons const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); std::memcpy(data.get(), code_data, code_size); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code_size; - ci.pCode = data.get(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = data.get(), + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 45c180221..5eca0ab91 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -71,20 +71,19 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { const u32 log2 = Common::Log2Ceil64(size); - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = 1ULL << log2; - ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - auto buffer = std::make_unique<VKBuffer>(); - buffer->handle = device.GetLogical().CreateBuffer(ci); + buffer->handle = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 1ULL << log2, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); buffer->commit = memory_manager.Commit(buffer->handle, host_visible); auto& entries = GetCache(host_visible)[log2].entries; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 2d28a6c47..a5526a3f5 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -122,30 +122,27 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { // Substract from the preferred heap size some bytes to avoid getting out of memory. const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024; - - VkBufferCreateInfo buffer_ci; - buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_ci.pNext = nullptr; - buffer_ci.flags = 0; - buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); - buffer_ci.usage = usage; - buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_ci.queueFamilyIndexCount = 0; - buffer_ci.pQueueFamilyIndices = nullptr; - - buffer = device.GetLogical().CreateBuffer(buffer_ci); + buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); const u32 required_flags = requirements.memoryTypeBits; stream_buffer_size = static_cast<u64>(requirements.size); - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = requirements.size; - memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); - - memory = device.GetLogical().AllocateMemory(memory_ai); + memory = device.GetLogical().AllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = requirements.size, + .memoryTypeIndex = GetMemoryType(memory_properties, required_flags), + }); buffer.BindMemory(*memory, 0); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index bffd8f32a..c25e312b6 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -95,15 +95,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { const auto present_queue{device.GetPresentQueue()}; bool recreated = false; - VkPresentInfoKHR present_info; - present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present_info.pNext = nullptr; - present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U; - present_info.pWaitSemaphores = semaphores.data(); - present_info.swapchainCount = 1; - present_info.pSwapchains = swapchain.address(); - present_info.pImageIndices = &image_index; - present_info.pResults = nullptr; + const VkPresentInfoKHR present_info{ + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreCount = render_semaphore ? 2U : 1U, + .pWaitSemaphores = semaphores.data(), + .swapchainCount = 1, + .pSwapchains = swapchain.address(), + .pImageIndices = &image_index, + .pResults = nullptr, + }; switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: @@ -147,24 +148,25 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci; - swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - swapchain_ci.pNext = nullptr; - swapchain_ci.flags = 0; - swapchain_ci.surface = surface; - swapchain_ci.minImageCount = requested_image_count; - swapchain_ci.imageFormat = surface_format.format; - swapchain_ci.imageColorSpace = surface_format.colorSpace; - swapchain_ci.imageArrayLayers = 1; - swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - swapchain_ci.queueFamilyIndexCount = 0; - swapchain_ci.pQueueFamilyIndices = nullptr; - swapchain_ci.preTransform = capabilities.currentTransform; - swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - swapchain_ci.presentMode = present_mode; - swapchain_ci.clipped = VK_FALSE; - swapchain_ci.oldSwapchain = nullptr; + VkSwapchainCreateInfoKHR swapchain_ci{ + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .surface = surface, + .minImageCount = requested_image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageArrayLayers = 1, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .preTransform = capabilities.currentTransform, + .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + .presentMode = present_mode, + .clipped = VK_FALSE, + .oldSwapchain = nullptr, + }; const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; @@ -173,8 +175,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT; swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); - } else { - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; } // Request the size again to reduce the possibility of a TOCTOU race condition. @@ -200,20 +200,28 @@ void VKSwapchain::CreateSemaphores() { } void VKSwapchain::CreateImageViews() { - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - // ci.image - ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - ci.format = image_format; - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - ci.subresourceRange.baseMipLevel = 0; - ci.subresourceRange.levelCount = 1; - ci.subresourceRange.baseArrayLayer = 0; - ci.subresourceRange.layerCount = 1; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; image_views.resize(image_count); for (std::size_t i = 0; i < image_count; i++) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index bd93dcf20..d102e6d27 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -95,17 +95,18 @@ VkImageViewType GetImageViewType(SurfaceTarget target) { vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(host_memory_size); - ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - return device.GetLogical().CreateBuffer(ci); + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(host_memory_size), + .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); } VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, @@ -113,15 +114,16 @@ VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, std::size_t host_memory_size) { ASSERT(params.IsBuffer()); - VkBufferViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.buffer = buffer; - ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - ci.offset = 0; - ci.range = static_cast<VkDeviceSize>(host_memory_size); - return ci; + return { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = buffer, + .format = + MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, + .offset = 0, + .range = static_cast<VkDeviceSize>(host_memory_size), + }; } VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { @@ -130,23 +132,23 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP const auto [format, attachable, storage] = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); - VkImageCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.imageType = SurfaceTargetToImage(params.target); - ci.format = format; - ci.mipLevels = params.num_levels; - ci.arrayLayers = static_cast<u32>(params.GetNumLayers()); - ci.samples = VK_SAMPLE_COUNT_1_BIT; - ci.tiling = VK_IMAGE_TILING_OPTIMAL; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + VkImageCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = SurfaceTargetToImage(params.target), + .format = format, + .mipLevels = params.num_levels, + .arrayLayers = static_cast<u32>(params.GetNumLayers()), + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; if (attachable) { ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -233,7 +235,7 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { UNIMPLEMENTED_IF(params.IsBuffer()); - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { + if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); } @@ -321,22 +323,25 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { } VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - VkBufferImageCopy copy; - copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted); - copy.bufferRowLength = 0; - copy.bufferImageHeight = 0; - copy.imageSubresource.aspectMask = image->GetAspectMask(); - copy.imageSubresource.mipLevel = level; - copy.imageSubresource.baseArrayLayer = 0; - copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers()); - copy.imageOffset.x = 0; - copy.imageOffset.y = 0; - copy.imageOffset.z = 0; - copy.imageExtent.width = params.GetMipWidth(level); - copy.imageExtent.height = params.GetMipHeight(level); - copy.imageExtent.depth = - params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - return copy; + return { + .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = image->GetAspectMask(), + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = static_cast<u32>(params.GetNumLayers()), + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = params.GetMipWidth(level), + .height = params.GetMipHeight(level), + .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, + }, + }; } VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { @@ -380,7 +385,7 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; - if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { + if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5_UNORM) { // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. std::swap(swizzle[0], swizzle[2]); } @@ -392,11 +397,11 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); const bool is_first = x_source == SwizzleSource::R; switch (params.pixel_format) { - case VideoCore::Surface::PixelFormat::Z24S8: - case VideoCore::Surface::PixelFormat::Z32FS8: + case VideoCore::Surface::PixelFormat::D24_UNORM_S8_UINT: + case VideoCore::Surface::PixelFormat::D32_FLOAT_S8_UINT: aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; break; - case VideoCore::Surface::PixelFormat::S8Z24: + case VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM: aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; break; default: @@ -416,20 +421,29 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc ASSERT(num_slices == params.depth); } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.viewType = image_view_type; - ci.format = surface.GetImage().GetFormat(); - ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]}; - ci.subresourceRange.aspectMask = aspect; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; - ci.subresourceRange.baseArrayLayer = base_layer; - ci.subresourceRange.layerCount = num_layers; - image_view = device.GetLogical().CreateImageView(ci); + image_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .viewType = image_view_type, + .format = surface.GetImage().GetFormat(), + .components = + { + .r = swizzle[0], + .g = swizzle[1], + .b = swizzle[2], + .a = swizzle[3], + }, + .subresourceRange = + { + .aspectMask = aspect, + .baseMipLevel = base_level, + .levelCount = num_levels, + .baseArrayLayer = base_layer, + .layerCount = num_layers, + }, + }); return last_image_view = *image_view; } @@ -439,17 +453,26 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.format = surface.GetImage().GetFormat(); - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = aspect_mask; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .format = surface.GetImage().GetFormat(), + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = base_level, + .levelCount = num_levels, + }, + }; if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; ci.subresourceRange.baseArrayLayer = base_slice; @@ -502,24 +525,40 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkImageCopy copy; - copy.srcSubresource.aspectMask = src_surface->GetAspectMask(); - copy.srcSubresource.mipLevel = copy_params.source_level; - copy.srcSubresource.baseArrayLayer = copy_params.source_z; - copy.srcSubresource.layerCount = num_layers; - copy.srcOffset.x = copy_params.source_x; - copy.srcOffset.y = copy_params.source_y; - copy.srcOffset.z = 0; - copy.dstSubresource.aspectMask = dst_surface->GetAspectMask(); - copy.dstSubresource.mipLevel = copy_params.dest_level; - copy.dstSubresource.baseArrayLayer = dst_base_layer; - copy.dstSubresource.layerCount = num_layers; - copy.dstOffset.x = copy_params.dest_x; - copy.dstOffset.y = copy_params.dest_y; - copy.dstOffset.z = dst_offset_z; - copy.extent.width = copy_params.width; - copy.extent.height = copy_params.height; - copy.extent.depth = extent_z; + const VkImageCopy copy{ + .srcSubresource = + { + .aspectMask = src_surface->GetAspectMask(), + .mipLevel = copy_params.source_level, + .baseArrayLayer = copy_params.source_z, + .layerCount = num_layers, + }, + .srcOffset = + { + .x = static_cast<s32>(copy_params.source_x), + .y = static_cast<s32>(copy_params.source_y), + .z = 0, + }, + .dstSubresource = + { + .aspectMask = dst_surface->GetAspectMask(), + .mipLevel = copy_params.dest_level, + .baseArrayLayer = dst_base_layer, + .layerCount = num_layers, + }, + .dstOffset = + { + .x = static_cast<s32>(copy_params.dest_x), + .y = static_cast<s32>(copy_params.dest_y), + .z = static_cast<s32>(dst_offset_z), + }, + .extent = + { + .width = copy_params.width, + .height = copy_params.height, + .depth = extent_z, + }, + }; const VkImage src_image = src_surface->GetImageHandle(); const VkImage dst_image = dst_surface->GetImageHandle(); diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 051298cc8..14cac38ea 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -377,24 +377,26 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions, InstanceDispatch& dld) noexcept { - VkApplicationInfo application_info; - application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - application_info.pNext = nullptr; - application_info.pApplicationName = "yuzu Emulator"; - application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.pEngineName = "yuzu Emulator"; - application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.apiVersion = VK_API_VERSION_1_1; - - VkInstanceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.pApplicationInfo = &application_info; - ci.enabledLayerCount = layers.size(); - ci.ppEnabledLayerNames = layers.data(); - ci.enabledExtensionCount = extensions.size(); - ci.ppEnabledExtensionNames = extensions.data(); + static constexpr VkApplicationInfo application_info{ + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = nullptr, + .pApplicationName = "yuzu Emulator", + .applicationVersion = VK_MAKE_VERSION(0, 1, 0), + .pEngineName = "yuzu Emulator", + .engineVersion = VK_MAKE_VERSION(0, 1, 0), + .apiVersion = VK_API_VERSION_1_1, + }; + + const VkInstanceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .pApplicationInfo = &application_info, + .enabledLayerCount = layers.size(), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = extensions.size(), + .ppEnabledExtensionNames = extensions.data(), + }; VkInstance instance; if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { @@ -425,19 +427,20 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices( DebugCallback Instance::TryCreateDebugCallback( PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { - VkDebugUtilsMessengerCreateInfoEXT ci; - ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - ci.pNext = nullptr; - ci.flags = 0; - ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; - ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - ci.pfnUserCallback = callback; - ci.pUserData = nullptr; + const VkDebugUtilsMessengerCreateInfoEXT ci{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = callback, + .pUserData = nullptr, + }; VkDebugUtilsMessengerEXT messenger; if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { @@ -468,12 +471,13 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c } CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { - VkCommandBufferAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.commandPool = handle; - ai.level = level; - ai.commandBufferCount = static_cast<u32>(num_buffers); + const VkCommandBufferAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = nullptr, + .commandPool = handle, + .level = level, + .commandBufferCount = static_cast<u32>(num_buffers), + }; std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers); switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { @@ -497,17 +501,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, DeviceDispatch& dld) noexcept { - VkDeviceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - ci.pNext = next; - ci.flags = 0; - ci.queueCreateInfoCount = queues_ci.size(); - ci.pQueueCreateInfos = queues_ci.data(); - ci.enabledLayerCount = 0; - ci.ppEnabledLayerNames = nullptr; - ci.enabledExtensionCount = enabled_extensions.size(); - ci.ppEnabledExtensionNames = enabled_extensions.data(); - ci.pEnabledFeatures = nullptr; + const VkDeviceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = next, + .flags = 0, + .queueCreateInfoCount = queues_ci.size(), + .pQueueCreateInfos = queues_ci.data(), + .enabledLayerCount = 0, + .ppEnabledLayerNames = nullptr, + .enabledExtensionCount = enabled_extensions.size(), + .ppEnabledExtensionNames = enabled_extensions.data(), + .pEnabledFeatures = nullptr, + }; VkDevice device; if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { @@ -548,10 +553,11 @@ ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { } Semaphore Device::CreateSemaphore() const { - VkSemaphoreCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkSemaphoreCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; VkSemaphore object; Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); @@ -639,10 +645,12 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons } Event Device::CreateEvent() const { - VkEventCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkEventCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + VkEvent object; Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); return Event(object, handle, *dld); diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..b7f66d7ee --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp @@ -0,0 +1,181 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include <condition_variable> +#include <mutex> +#include <thread> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/shader/async_shaders.h" + +namespace VideoCommon::Shader { + +AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} + +AsyncShaders::~AsyncShaders() { + KillWorkers(); +} + +void AsyncShaders::AllocateWorkers(std::size_t num_workers) { + // If we're already have workers queued or don't want to queue workers, ignore + if (num_workers == worker_threads.size() || num_workers == 0) { + return; + } + + // If workers already exist, clear them + if (!worker_threads.empty()) { + FreeWorkers(); + } + + // Create workers + for (std::size_t i = 0; i < num_workers; i++) { + context_list.push_back(emu_window.CreateSharedContext()); + worker_threads.push_back(std::move( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); + } +} + +void AsyncShaders::FreeWorkers() { + // Mark all threads to quit + is_thread_exiting.store(true); + cv.notify_all(); + for (auto& thread : worker_threads) { + thread.join(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +void AsyncShaders::KillWorkers() { + is_thread_exiting.store(true); + for (auto& thread : worker_threads) { + thread.detach(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +bool AsyncShaders::HasWorkQueued() { + return !pending_queue.empty(); +} + +bool AsyncShaders::HasCompletedWork() { + std::shared_lock lock{completed_mutex}; + return !finished_work.empty(); +} + +bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { + const auto& regs = gpu.Maxwell3D().regs; + + // If something is using depth, we can assume that games are not rendering anything which will + // be used one time. + if (regs.zeta_enable) { + return true; + } + + // If games are using a small index count, we can assume these are full screen quads. Usually + // these shaders are only used once for building textures so we can assume they can't be built + // async + if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { + return false; + } + + return true; +} + +std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { + std::vector<AsyncShaders::Result> results; + { + std::unique_lock lock{completed_mutex}; + results.assign(std::make_move_iterator(finished_work.begin()), + std::make_move_iterator(finished_work.end())); + finished_work.clear(); + } + return results; +} + +void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, + Tegra::Engines::ShaderType shader_type, u64 uid, + std::vector<u64> code, std::vector<u64> code_b, + u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, + VAddr cpu_addr) { + WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM + : AsyncShaders::Backend::OpenGL, + device, + shader_type, + uid, + std::move(code), + std::move(code_b), + main_offset, + compiler_settings, + registry, + cpu_addr}; + std::unique_lock lock(queue_mutex); + pending_queue.push_back(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { + using namespace std::chrono_literals; + while (!is_thread_exiting.load(std::memory_order_relaxed)) { + std::unique_lock lock{queue_mutex}; + cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); + if (is_thread_exiting) { + return; + } + + // Partial lock to allow all threads to read at the same time + if (!HasWorkQueued()) { + continue; + } + // Another thread beat us, just unlock and wait for the next load + if (pending_queue.empty()) { + continue; + } + // Pull work from queue + WorkerParams work = std::move(pending_queue.front()); + pending_queue.pop_front(); + + lock.unlock(); + + if (work.backend == AsyncShaders::Backend::OpenGL || + work.backend == AsyncShaders::Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); + const auto scope = context->Acquire(); + auto program = + OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); + Result result{}; + result.backend = work.backend; + result.cpu_address = work.cpu_address; + result.uid = work.uid; + result.code = std::move(work.code); + result.code_b = std::move(work.code_b); + result.shader_type = work.shader_type; + + if (work.backend == AsyncShaders::Backend::OpenGL) { + result.program.opengl = std::move(program->source_program); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + result.program.glasm = std::move(program->assembly_program); + } + + { + std::unique_lock complete_lock(completed_mutex); + finished_work.push_back(std::move(result)); + } + } + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..2f5ee94ad --- /dev/null +++ b/src/video_core/shader/async_shaders.h @@ -0,0 +1,109 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <condition_variable> +#include <deque> +#include <memory> +#include <shared_mutex> +#include <thread> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" + +namespace Core::Frontend { +class EmuWindow; +class GraphicsContext; +} // namespace Core::Frontend + +namespace Tegra { +class GPU; +} + +namespace VideoCommon::Shader { + +class AsyncShaders { +public: + enum class Backend { + OpenGL, + GLASM, + }; + + struct ResultPrograms { + OpenGL::OGLProgram opengl; + OpenGL::OGLAssemblyProgram glasm; + }; + + struct Result { + u64 uid; + VAddr cpu_address; + Backend backend; + ResultPrograms program; + std::vector<u64> code; + std::vector<u64> code_b; + Tegra::Engines::ShaderType shader_type; + }; + + explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); + ~AsyncShaders(); + + /// Start up shader worker threads + void AllocateWorkers(std::size_t num_workers); + + /// Clear the shader queue and kill all worker threads + void FreeWorkers(); + + // Force end all threads + void KillWorkers(); + + /// Check to see if any shaders have actually been compiled + bool HasCompletedWork(); + + /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build + /// every shader async as some shaders are only built and executed once. We try to "guess" which + /// shader would be used only once + bool IsShaderAsync(const Tegra::GPU& gpu) const; + + /// Pulls completed compiled shaders + std::vector<Result> GetCompletedWork(); + + void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, + u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + +private: + void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); + + /// Check our worker queue to see if we have any work queued already + bool HasWorkQueued(); + + struct WorkerParams { + AsyncShaders::Backend backend; + OpenGL::Device device; + Tegra::Engines::ShaderType shader_type; + u64 uid; + std::vector<u64> code; + std::vector<u64> code_b; + u32 main_offset; + VideoCommon::Shader::CompilerSettings compiler_settings; + VideoCommon::Shader::Registry registry; + VAddr cpu_address; + }; + + std::condition_variable cv; + std::mutex queue_mutex; + std::shared_mutex completed_mutex; + std::atomic<bool> is_thread_exiting{}; + std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; + std::vector<std::thread> worker_threads; + std::deque<WorkerParams> pending_queue; + std::vector<AsyncShaders::Result> finished_work; + Core::Frontend::EmuWindow& emu_window; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 07778dc3e..e75ca4fdb 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, std::size_t component) { const TextureFormat format{descriptor.format}; switch (format) { - case TextureFormat::R16_G16_B16_A16: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32_B32: - case TextureFormat::R32_G32: - case TextureFormat::R16_G16: + case TextureFormat::R16G16B16A16: + case TextureFormat::R32G32B32A32: + case TextureFormat::R32G32B32: + case TextureFormat::R32G32: + case TextureFormat::R16G16: case TextureFormat::R32: case TextureFormat::R16: case TextureFormat::R8: @@ -97,7 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, break; case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: if (component == 0) { return descriptor.b_type; } @@ -108,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, return descriptor.r_type; } break; - case TextureFormat::G8R24: - case TextureFormat::G24R8: - case TextureFormat::G8R8: + case TextureFormat::R24G8: + case TextureFormat::R8G24: + case TextureFormat::R8G8: case TextureFormat::G4R4: if (component == 0) { return descriptor.g_type; @@ -137,15 +137,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { u32 GetComponentSize(TextureFormat format, std::size_t component) { switch (format) { - case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R32G32B32A32: return 32; - case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R16G16B16A16: return 16; - case TextureFormat::R32_G32_B32: + case TextureFormat::R32G32B32: return component <= 2 ? 32 : 0; - case TextureFormat::R32_G32: + case TextureFormat::R32G32: return component <= 1 ? 32 : 0; - case TextureFormat::R16_G16: + case TextureFormat::R16G16: return component <= 1 ? 16 : 0; case TextureFormat::R32: return component == 0 ? 32 : 0; @@ -192,7 +192,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 6; } return 0; - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: if (component == 1 || component == 2) { return 11; } @@ -200,7 +200,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 10; } return 0; - case TextureFormat::G8R24: + case TextureFormat::R24G8: if (component == 0) { return 8; } @@ -208,7 +208,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 24; } return 0; - case TextureFormat::G24R8: + case TextureFormat::R8G24: if (component == 0) { return 8; } @@ -216,7 +216,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 24; } return 0; - case TextureFormat::G8R8: + case TextureFormat::R8G8: return (component == 0 || component == 1) ? 8 : 0; case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; @@ -231,25 +231,25 @@ std::size_t GetImageComponentMask(TextureFormat format) { constexpr u8 B = 0b0100; constexpr u8 A = 0b1000; switch (format) { - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32G32B32A32: + case TextureFormat::R16G16B16A16: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A4B4G4R4: case TextureFormat::A5B5G5R1: case TextureFormat::A1B5G5R5: return std::size_t{R | G | B | A}; - case TextureFormat::R32_G32_B32: + case TextureFormat::R32G32B32: case TextureFormat::R32_B24G8: case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: - case TextureFormat::BF10GF11RF11: + case TextureFormat::B10G11R11: return std::size_t{R | G | B}; - case TextureFormat::R32_G32: - case TextureFormat::R16_G16: - case TextureFormat::G8R24: - case TextureFormat::G24R8: - case TextureFormat::G8R8: + case TextureFormat::R32G32: + case TextureFormat::R16G16: + case TextureFormat::R24G8: + case TextureFormat::R8G24: + case TextureFormat::R8G8: case TextureFormat::G4R4: return std::size_t{R | G}; case TextureFormat::R32: diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp new file mode 100644 index 000000000..c3c71657d --- /dev/null +++ b/src/video_core/shader_notify.cpp @@ -0,0 +1,42 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/shader_notify.h" + +using namespace std::chrono_literals; + +namespace VideoCore { +namespace { +constexpr auto UPDATE_TICK = 32ms; +} + +ShaderNotify::ShaderNotify() = default; +ShaderNotify::~ShaderNotify() = default; + +std::size_t ShaderNotify::GetShadersBuilding() { + const auto now = std::chrono::high_resolution_clock::now(); + const auto diff = now - last_update; + if (diff > UPDATE_TICK) { + std::shared_lock lock(mutex); + last_updated_count = accurate_count; + } + return last_updated_count; +} + +std::size_t ShaderNotify::GetShadersBuildingAccurate() { + std::shared_lock lock{mutex}; + return accurate_count; +} + +void ShaderNotify::MarkShaderComplete() { + std::unique_lock lock{mutex}; + accurate_count--; +} + +void ShaderNotify::MarkSharderBuilding() { + std::unique_lock lock{mutex}; + accurate_count++; +} + +} // namespace VideoCore diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h new file mode 100644 index 000000000..a9c92d179 --- /dev/null +++ b/src/video_core/shader_notify.h @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <chrono> +#include <shared_mutex> +#include "common/common_types.h" + +namespace VideoCore { +class ShaderNotify { +public: + ShaderNotify(); + ~ShaderNotify(); + + std::size_t GetShadersBuilding(); + std::size_t GetShadersBuildingAccurate(); + + void MarkShaderComplete(); + void MarkSharderBuilding(); + +private: + std::size_t last_updated_count{}; + std::size_t accurate_count{}; + std::shared_mutex mutex; + std::chrono::high_resolution_clock::time_point last_update{}; +}; +} // namespace VideoCore diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index bbe93903c..1688267bb 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -74,117 +74,131 @@ bool SurfaceTargetIsArray(SurfaceTarget target) { PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { switch (format) { - case Tegra::DepthFormat::S8_Z24_UNORM: - return PixelFormat::S8Z24; - case Tegra::DepthFormat::Z24_S8_UNORM: - return PixelFormat::Z24S8; - case Tegra::DepthFormat::Z32_FLOAT: - return PixelFormat::Z32F; - case Tegra::DepthFormat::Z16_UNORM: - return PixelFormat::Z16; - case Tegra::DepthFormat::Z32_S8_X24_FLOAT: - return PixelFormat::Z32FS8; + case Tegra::DepthFormat::S8_UINT_Z24_UNORM: + return PixelFormat::S8_UINT_D24_UNORM; + case Tegra::DepthFormat::D24S8_UNORM: + return PixelFormat::D24_UNORM_S8_UINT; + case Tegra::DepthFormat::D32_FLOAT: + return PixelFormat::D32_FLOAT; + case Tegra::DepthFormat::D16_UNORM: + return PixelFormat::D16_UNORM; + case Tegra::DepthFormat::D32_FLOAT_S8X24_UINT: + return PixelFormat::D32_FLOAT_S8_UINT; default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - return PixelFormat::S8Z24; + UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); + return PixelFormat::S8_UINT_D24_UNORM; } } PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { - case Tegra::RenderTargetFormat::RGBA8_SRGB: - return PixelFormat::RGBA8_SRGB; - case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::ABGR8U; - case Tegra::RenderTargetFormat::RGBA8_SNORM: - return PixelFormat::ABGR8S; - case Tegra::RenderTargetFormat::RGBA8_UINT: - return PixelFormat::ABGR8UI; - case Tegra::RenderTargetFormat::BGRA8_SRGB: - return PixelFormat::BGRA8_SRGB; - case Tegra::RenderTargetFormat::BGRA8_UNORM: - return PixelFormat::BGRA8; - case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - return PixelFormat::A2B10G10R10U; - case Tegra::RenderTargetFormat::RGBA16_FLOAT: - return PixelFormat::RGBA16F; - case Tegra::RenderTargetFormat::RGBA16_UNORM: - return PixelFormat::RGBA16U; - case Tegra::RenderTargetFormat::RGBA16_SNORM: - return PixelFormat::RGBA16S; - case Tegra::RenderTargetFormat::RGBA16_UINT: - return PixelFormat::RGBA16UI; - case Tegra::RenderTargetFormat::RGBA32_FLOAT: - return PixelFormat::RGBA32F; - case Tegra::RenderTargetFormat::RG32_FLOAT: - return PixelFormat::RG32F; - case Tegra::RenderTargetFormat::R11G11B10_FLOAT: - return PixelFormat::R11FG11FB10F; - case Tegra::RenderTargetFormat::B5G6R5_UNORM: - return PixelFormat::B5G6R5U; - case Tegra::RenderTargetFormat::BGR5A1_UNORM: - return PixelFormat::A1B5G5R5U; - case Tegra::RenderTargetFormat::RGBA32_UINT: - return PixelFormat::RGBA32UI; - case Tegra::RenderTargetFormat::R8_UNORM: - return PixelFormat::R8U; - case Tegra::RenderTargetFormat::R8_UINT: - return PixelFormat::R8UI; - case Tegra::RenderTargetFormat::RG16_FLOAT: - return PixelFormat::RG16F; - case Tegra::RenderTargetFormat::RG16_UINT: - return PixelFormat::RG16UI; - case Tegra::RenderTargetFormat::RG16_SINT: - return PixelFormat::RG16I; - case Tegra::RenderTargetFormat::RG16_UNORM: - return PixelFormat::RG16; - case Tegra::RenderTargetFormat::RG16_SNORM: - return PixelFormat::RG16S; - case Tegra::RenderTargetFormat::RG8_UNORM: - return PixelFormat::RG8U; - case Tegra::RenderTargetFormat::RG8_SNORM: - return PixelFormat::RG8S; - case Tegra::RenderTargetFormat::RG8_UINT: - return PixelFormat::RG8UI; - case Tegra::RenderTargetFormat::R16_FLOAT: - return PixelFormat::R16F; + case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT: + return PixelFormat::R32G32B32A32_FLOAT; + case Tegra::RenderTargetFormat::R32G32B32A32_SINT: + return PixelFormat::R32G32B32A32_SINT; + case Tegra::RenderTargetFormat::R32G32B32A32_UINT: + return PixelFormat::R32G32B32A32_UINT; + case Tegra::RenderTargetFormat::R16G16B16A16_UNORM: + return PixelFormat::R16G16B16A16_UNORM; + case Tegra::RenderTargetFormat::R16G16B16A16_SNORM: + return PixelFormat::R16G16B16A16_SNORM; + case Tegra::RenderTargetFormat::R16G16B16A16_SINT: + return PixelFormat::R16G16B16A16_SINT; + case Tegra::RenderTargetFormat::R16G16B16A16_UINT: + return PixelFormat::R16G16B16A16_UINT; + case Tegra::RenderTargetFormat::R16G16B16A16_FLOAT: + return PixelFormat::R16G16B16A16_FLOAT; + case Tegra::RenderTargetFormat::R32G32_FLOAT: + return PixelFormat::R32G32_FLOAT; + case Tegra::RenderTargetFormat::R32G32_SINT: + return PixelFormat::R32G32_SINT; + case Tegra::RenderTargetFormat::R32G32_UINT: + return PixelFormat::R32G32_UINT; + case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT: + return PixelFormat::R16G16B16X16_FLOAT; + case Tegra::RenderTargetFormat::B8G8R8A8_UNORM: + return PixelFormat::B8G8R8A8_UNORM; + case Tegra::RenderTargetFormat::B8G8R8A8_SRGB: + return PixelFormat::B8G8R8A8_SRGB; + case Tegra::RenderTargetFormat::A2B10G10R10_UNORM: + return PixelFormat::A2B10G10R10_UNORM; + case Tegra::RenderTargetFormat::A2B10G10R10_UINT: + return PixelFormat::A2B10G10R10_UINT; + case Tegra::RenderTargetFormat::A8B8G8R8_UNORM: + return PixelFormat::A8B8G8R8_UNORM; + case Tegra::RenderTargetFormat::A8B8G8R8_SRGB: + return PixelFormat::A8B8G8R8_SRGB; + case Tegra::RenderTargetFormat::A8B8G8R8_SNORM: + return PixelFormat::A8B8G8R8_SNORM; + case Tegra::RenderTargetFormat::A8B8G8R8_SINT: + return PixelFormat::A8B8G8R8_SINT; + case Tegra::RenderTargetFormat::A8B8G8R8_UINT: + return PixelFormat::A8B8G8R8_UINT; + case Tegra::RenderTargetFormat::R16G16_UNORM: + return PixelFormat::R16G16_UNORM; + case Tegra::RenderTargetFormat::R16G16_SNORM: + return PixelFormat::R16G16_SNORM; + case Tegra::RenderTargetFormat::R16G16_SINT: + return PixelFormat::R16G16_SINT; + case Tegra::RenderTargetFormat::R16G16_UINT: + return PixelFormat::R16G16_UINT; + case Tegra::RenderTargetFormat::R16G16_FLOAT: + return PixelFormat::R16G16_FLOAT; + case Tegra::RenderTargetFormat::B10G11R11_FLOAT: + return PixelFormat::B10G11R11_FLOAT; + case Tegra::RenderTargetFormat::R32_SINT: + return PixelFormat::R32_SINT; + case Tegra::RenderTargetFormat::R32_UINT: + return PixelFormat::R32_UINT; + case Tegra::RenderTargetFormat::R32_FLOAT: + return PixelFormat::R32_FLOAT; + case Tegra::RenderTargetFormat::R5G6B5_UNORM: + return PixelFormat::R5G6B5_UNORM; + case Tegra::RenderTargetFormat::A1R5G5B5_UNORM: + return PixelFormat::A1R5G5B5_UNORM; + case Tegra::RenderTargetFormat::R8G8_UNORM: + return PixelFormat::R8G8_UNORM; + case Tegra::RenderTargetFormat::R8G8_SNORM: + return PixelFormat::R8G8_SNORM; + case Tegra::RenderTargetFormat::R8G8_SINT: + return PixelFormat::R8G8_SINT; + case Tegra::RenderTargetFormat::R8G8_UINT: + return PixelFormat::R8G8_UINT; case Tegra::RenderTargetFormat::R16_UNORM: - return PixelFormat::R16U; + return PixelFormat::R16_UNORM; case Tegra::RenderTargetFormat::R16_SNORM: - return PixelFormat::R16S; - case Tegra::RenderTargetFormat::R16_UINT: - return PixelFormat::R16UI; + return PixelFormat::R16_SNORM; case Tegra::RenderTargetFormat::R16_SINT: - return PixelFormat::R16I; - case Tegra::RenderTargetFormat::R32_FLOAT: - return PixelFormat::R32F; - case Tegra::RenderTargetFormat::R32_SINT: - return PixelFormat::R32I; - case Tegra::RenderTargetFormat::R32_UINT: - return PixelFormat::R32UI; - case Tegra::RenderTargetFormat::RG32_UINT: - return PixelFormat::RG32UI; - case Tegra::RenderTargetFormat::RGBX16_FLOAT: - return PixelFormat::RGBX16F; + return PixelFormat::R16_SINT; + case Tegra::RenderTargetFormat::R16_UINT: + return PixelFormat::R16_UINT; + case Tegra::RenderTargetFormat::R16_FLOAT: + return PixelFormat::R16_FLOAT; + case Tegra::RenderTargetFormat::R8_UNORM: + return PixelFormat::R8_UNORM; + case Tegra::RenderTargetFormat::R8_SNORM: + return PixelFormat::R8_SNORM; + case Tegra::RenderTargetFormat::R8_SINT: + return PixelFormat::R8_SINT; + case Tegra::RenderTargetFormat::R8_UINT: + return PixelFormat::R8_UINT; default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - return PixelFormat::RGBA8_SRGB; + UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<int>(format)); + return PixelFormat::A8B8G8R8_UNORM; } } PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8U; - case Tegra::FramebufferConfig::PixelFormat::RGB565: - return PixelFormat::B5G6R5U; - case Tegra::FramebufferConfig::PixelFormat::BGRA8: - return PixelFormat::BGRA8; + case Tegra::FramebufferConfig::PixelFormat::A8B8G8R8_UNORM: + return PixelFormat::A8B8G8R8_UNORM; + case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: + return PixelFormat::R5G6B5_UNORM; + case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM: + return PixelFormat::B8G8R8A8_UNORM; default: UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format)); - return PixelFormat::ABGR8U; + return PixelFormat::A8B8G8R8_UNORM; } } @@ -212,27 +226,27 @@ SurfaceType GetFormatType(PixelFormat pixel_format) { bool IsPixelFormatASTC(PixelFormat format) { switch (format) { - case PixelFormat::ASTC_2D_4X4: - case PixelFormat::ASTC_2D_5X4: - case PixelFormat::ASTC_2D_5X5: - case PixelFormat::ASTC_2D_8X8: - case PixelFormat::ASTC_2D_8X5: + case PixelFormat::ASTC_2D_4X4_UNORM: + case PixelFormat::ASTC_2D_5X4_UNORM: + case PixelFormat::ASTC_2D_5X5_UNORM: + case PixelFormat::ASTC_2D_8X8_UNORM: + case PixelFormat::ASTC_2D_8X5_UNORM: case PixelFormat::ASTC_2D_4X4_SRGB: case PixelFormat::ASTC_2D_5X4_SRGB: case PixelFormat::ASTC_2D_5X5_SRGB: case PixelFormat::ASTC_2D_8X8_SRGB: case PixelFormat::ASTC_2D_8X5_SRGB: - case PixelFormat::ASTC_2D_10X8: + case PixelFormat::ASTC_2D_10X8_UNORM: case PixelFormat::ASTC_2D_10X8_SRGB: - case PixelFormat::ASTC_2D_6X6: + case PixelFormat::ASTC_2D_6X6_UNORM: case PixelFormat::ASTC_2D_6X6_SRGB: - case PixelFormat::ASTC_2D_10X10: + case PixelFormat::ASTC_2D_10X10_UNORM: case PixelFormat::ASTC_2D_10X10_SRGB: - case PixelFormat::ASTC_2D_12X12: + case PixelFormat::ASTC_2D_12X12_UNORM: case PixelFormat::ASTC_2D_12X12_SRGB: - case PixelFormat::ASTC_2D_8X6: + case PixelFormat::ASTC_2D_8X6_UNORM: case PixelFormat::ASTC_2D_8X6_SRGB: - case PixelFormat::ASTC_2D_6X5: + case PixelFormat::ASTC_2D_6X5_UNORM: case PixelFormat::ASTC_2D_6X5_SRGB: return true; default: @@ -242,12 +256,12 @@ bool IsPixelFormatASTC(PixelFormat format) { bool IsPixelFormatSRGB(PixelFormat format) { switch (format) { - case PixelFormat::RGBA8_SRGB: - case PixelFormat::BGRA8_SRGB: - case PixelFormat::DXT1_SRGB: - case PixelFormat::DXT23_SRGB: - case PixelFormat::DXT45_SRGB: - case PixelFormat::BC7U_SRGB: + case PixelFormat::A8B8G8R8_SRGB: + case PixelFormat::B8G8R8A8_SRGB: + case PixelFormat::BC1_RGBA_SRGB: + case PixelFormat::BC2_SRGB: + case PixelFormat::BC3_SRGB: + case PixelFormat::BC7_SRGB: case PixelFormat::ASTC_2D_4X4_SRGB: case PixelFormat::ASTC_2D_8X8_SRGB: case PixelFormat::ASTC_2D_8X5_SRGB: @@ -269,25 +283,4 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; } -bool IsFormatBCn(PixelFormat format) { - switch (format) { - case PixelFormat::DXT1: - case PixelFormat::DXT23: - case PixelFormat::DXT45: - case PixelFormat::DXN1: - case PixelFormat::DXN2SNORM: - case PixelFormat::DXN2UNORM: - case PixelFormat::BC7U: - case PixelFormat::BC6H_UF16: - case PixelFormat::BC6H_SF16: - case PixelFormat::DXT1_SRGB: - case PixelFormat::DXT23_SRGB: - case PixelFormat::DXT45_SRGB: - case PixelFormat::BC7U_SRGB: - return true; - default: - return false; - } -} - } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 6da6a1b97..cfd12fa61 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -15,94 +15,105 @@ namespace VideoCore::Surface { enum class PixelFormat { - ABGR8U = 0, - ABGR8S = 1, - ABGR8UI = 2, - B5G6R5U = 3, - A2B10G10R10U = 4, - A1B5G5R5U = 5, - R8U = 6, - R8UI = 7, - RGBA16F = 8, - RGBA16U = 9, - RGBA16S = 10, - RGBA16UI = 11, - R11FG11FB10F = 12, - RGBA32UI = 13, - DXT1 = 14, - DXT23 = 15, - DXT45 = 16, - DXN1 = 17, // This is also known as BC4 - DXN2UNORM = 18, - DXN2SNORM = 19, - BC7U = 20, - BC6H_UF16 = 21, - BC6H_SF16 = 22, - ASTC_2D_4X4 = 23, - BGRA8 = 24, - RGBA32F = 25, - RG32F = 26, - R32F = 27, - R16F = 28, - R16U = 29, - R16S = 30, - R16UI = 31, - R16I = 32, - RG16 = 33, - RG16F = 34, - RG16UI = 35, - RG16I = 36, - RG16S = 37, - RGB32F = 38, - RGBA8_SRGB = 39, - RG8U = 40, - RG8S = 41, - RG8UI = 42, - RG32UI = 43, - RGBX16F = 44, - R32UI = 45, - R32I = 46, - ASTC_2D_8X8 = 47, - ASTC_2D_8X5 = 48, - ASTC_2D_5X4 = 49, - BGRA8_SRGB = 50, - DXT1_SRGB = 51, - DXT23_SRGB = 52, - DXT45_SRGB = 53, - BC7U_SRGB = 54, - R4G4B4A4U = 55, - ASTC_2D_4X4_SRGB = 56, - ASTC_2D_8X8_SRGB = 57, - ASTC_2D_8X5_SRGB = 58, - ASTC_2D_5X4_SRGB = 59, - ASTC_2D_5X5 = 60, - ASTC_2D_5X5_SRGB = 61, - ASTC_2D_10X8 = 62, - ASTC_2D_10X8_SRGB = 63, - ASTC_2D_6X6 = 64, - ASTC_2D_6X6_SRGB = 65, - ASTC_2D_10X10 = 66, - ASTC_2D_10X10_SRGB = 67, - ASTC_2D_12X12 = 68, - ASTC_2D_12X12_SRGB = 69, - ASTC_2D_8X6 = 70, - ASTC_2D_8X6_SRGB = 71, - ASTC_2D_6X5 = 72, - ASTC_2D_6X5_SRGB = 73, - E5B9G9R9F = 74, + A8B8G8R8_UNORM, + A8B8G8R8_SNORM, + A8B8G8R8_SINT, + A8B8G8R8_UINT, + R5G6B5_UNORM, + B5G6R5_UNORM, + A1R5G5B5_UNORM, + A2B10G10R10_UNORM, + A2B10G10R10_UINT, + A1B5G5R5_UNORM, + R8_UNORM, + R8_SNORM, + R8_SINT, + R8_UINT, + R16G16B16A16_FLOAT, + R16G16B16A16_UNORM, + R16G16B16A16_SNORM, + R16G16B16A16_SINT, + R16G16B16A16_UINT, + B10G11R11_FLOAT, + R32G32B32A32_UINT, + BC1_RGBA_UNORM, + BC2_UNORM, + BC3_UNORM, + BC4_UNORM, + BC4_SNORM, + BC5_UNORM, + BC5_SNORM, + BC7_UNORM, + BC6H_UFLOAT, + BC6H_SFLOAT, + ASTC_2D_4X4_UNORM, + B8G8R8A8_UNORM, + R32G32B32A32_FLOAT, + R32G32B32A32_SINT, + R32G32_FLOAT, + R32G32_SINT, + R32_FLOAT, + R16_FLOAT, + R16_UNORM, + R16_SNORM, + R16_UINT, + R16_SINT, + R16G16_UNORM, + R16G16_FLOAT, + R16G16_UINT, + R16G16_SINT, + R16G16_SNORM, + R32G32B32_FLOAT, + A8B8G8R8_SRGB, + R8G8_UNORM, + R8G8_SNORM, + R8G8_SINT, + R8G8_UINT, + R32G32_UINT, + R16G16B16X16_FLOAT, + R32_UINT, + R32_SINT, + ASTC_2D_8X8_UNORM, + ASTC_2D_8X5_UNORM, + ASTC_2D_5X4_UNORM, + B8G8R8A8_SRGB, + BC1_RGBA_SRGB, + BC2_SRGB, + BC3_SRGB, + BC7_SRGB, + A4B4G4R4_UNORM, + ASTC_2D_4X4_SRGB, + ASTC_2D_8X8_SRGB, + ASTC_2D_8X5_SRGB, + ASTC_2D_5X4_SRGB, + ASTC_2D_5X5_UNORM, + ASTC_2D_5X5_SRGB, + ASTC_2D_10X8_UNORM, + ASTC_2D_10X8_SRGB, + ASTC_2D_6X6_UNORM, + ASTC_2D_6X6_SRGB, + ASTC_2D_10X10_UNORM, + ASTC_2D_10X10_SRGB, + ASTC_2D_12X12_UNORM, + ASTC_2D_12X12_SRGB, + ASTC_2D_8X6_UNORM, + ASTC_2D_8X6_SRGB, + ASTC_2D_6X5_UNORM, + ASTC_2D_6X5_SRGB, + E5B9G9R9_FLOAT, MaxColorFormat, // Depth formats - Z32F = 75, - Z16 = 76, + D32_FLOAT = MaxColorFormat, + D16_UNORM, MaxDepthFormat, // DepthStencil formats - Z24S8 = 77, - S8Z24 = 78, - Z32FS8 = 79, + D24_UNORM_S8_UINT = MaxDepthFormat, + S8_UINT_D24_UNORM, + D32_FLOAT_S8_UINT, MaxDepthStencilFormat, @@ -130,86 +141,97 @@ enum class SurfaceTarget { }; constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ - 0, // ABGR8U - 0, // ABGR8S - 0, // ABGR8UI - 0, // B5G6R5U - 0, // A2B10G10R10U - 0, // A1B5G5R5U - 0, // R8U - 0, // R8UI - 0, // RGBA16F - 0, // RGBA16U - 0, // RGBA16S - 0, // RGBA16UI - 0, // R11FG11FB10F - 0, // RGBA32UI - 2, // DXT1 - 2, // DXT23 - 2, // DXT45 - 2, // DXN1 - 2, // DXN2UNORM - 2, // DXN2SNORM - 2, // BC7U - 2, // BC6H_UF16 - 2, // BC6H_SF16 - 2, // ASTC_2D_4X4 - 0, // BGRA8 - 0, // RGBA32F - 0, // RG32F - 0, // R32F - 0, // R16F - 0, // R16U - 0, // R16S - 0, // R16UI - 0, // R16I - 0, // RG16 - 0, // RG16F - 0, // RG16UI - 0, // RG16I - 0, // RG16S - 0, // RGB32F - 0, // RGBA8_SRGB - 0, // RG8U - 0, // RG8S - 0, // RG8UI - 0, // RG32UI - 0, // RGBX16F - 0, // R32UI - 0, // R32I - 2, // ASTC_2D_8X8 - 2, // ASTC_2D_8X5 - 2, // ASTC_2D_5X4 - 0, // BGRA8_SRGB - 2, // DXT1_SRGB - 2, // DXT23_SRGB - 2, // DXT45_SRGB - 2, // BC7U_SRGB - 0, // R4G4B4A4U + 0, // A8B8G8R8_UNORM + 0, // A8B8G8R8_SNORM + 0, // A8B8G8R8_SINT + 0, // A8B8G8R8_UINT + 0, // R5G6B5_UNORM + 0, // B5G6R5_UNORM + 0, // A1R5G5B5_UNORM + 0, // A2B10G10R10_UNORM + 0, // A2B10G10R10_UINT + 0, // A1B5G5R5_UNORM + 0, // R8_UNORM + 0, // R8_SNORM + 0, // R8_SINT + 0, // R8_UINT + 0, // R16G16B16A16_FLOAT + 0, // R16G16B16A16_UNORM + 0, // R16G16B16A16_SNORM + 0, // R16G16B16A16_SINT + 0, // R16G16B16A16_UINT + 0, // B10G11R11_FLOAT + 0, // R32G32B32A32_UINT + 2, // BC1_RGBA_UNORM + 2, // BC2_UNORM + 2, // BC3_UNORM + 2, // BC4_UNORM + 2, // BC4_SNORM + 2, // BC5_UNORM + 2, // BC5_SNORM + 2, // BC7_UNORM + 2, // BC6H_UFLOAT + 2, // BC6H_SFLOAT + 2, // ASTC_2D_4X4_UNORM + 0, // B8G8R8A8_UNORM + 0, // R32G32B32A32_FLOAT + 0, // R32G32B32A32_SINT + 0, // R32G32_FLOAT + 0, // R32G32_SINT + 0, // R32_FLOAT + 0, // R16_FLOAT + 0, // R16_UNORM + 0, // R16_SNORM + 0, // R16_UINT + 0, // R16_SINT + 0, // R16G16_UNORM + 0, // R16G16_FLOAT + 0, // R16G16_UINT + 0, // R16G16_SINT + 0, // R16G16_SNORM + 0, // R32G32B32_FLOAT + 0, // A8B8G8R8_SRGB + 0, // R8G8_UNORM + 0, // R8G8_SNORM + 0, // R8G8_SINT + 0, // R8G8_UINT + 0, // R32G32_UINT + 0, // R16G16B16X16_FLOAT + 0, // R32_UINT + 0, // R32_SINT + 2, // ASTC_2D_8X8_UNORM + 2, // ASTC_2D_8X5_UNORM + 2, // ASTC_2D_5X4_UNORM + 0, // B8G8R8A8_SRGB + 2, // BC1_RGBA_SRGB + 2, // BC2_SRGB + 2, // BC3_SRGB + 2, // BC7_SRGB + 0, // A4B4G4R4_UNORM 2, // ASTC_2D_4X4_SRGB 2, // ASTC_2D_8X8_SRGB 2, // ASTC_2D_8X5_SRGB 2, // ASTC_2D_5X4_SRGB - 2, // ASTC_2D_5X5 + 2, // ASTC_2D_5X5_UNORM 2, // ASTC_2D_5X5_SRGB - 2, // ASTC_2D_10X8 + 2, // ASTC_2D_10X8_UNORM 2, // ASTC_2D_10X8_SRGB - 2, // ASTC_2D_6X6 + 2, // ASTC_2D_6X6_UNORM 2, // ASTC_2D_6X6_SRGB - 2, // ASTC_2D_10X10 + 2, // ASTC_2D_10X10_UNORM 2, // ASTC_2D_10X10_SRGB - 2, // ASTC_2D_12X12 + 2, // ASTC_2D_12X12_UNORM 2, // ASTC_2D_12X12_SRGB - 2, // ASTC_2D_8X6 + 2, // ASTC_2D_8X6_UNORM 2, // ASTC_2D_8X6_SRGB - 2, // ASTC_2D_6X5 + 2, // ASTC_2D_6X5_UNORM 2, // ASTC_2D_6X5_SRGB - 0, // E5B9G9R9F - 0, // Z32F - 0, // Z16 - 0, // Z24S8 - 0, // S8Z24 - 0, // Z32FS8 + 0, // E5B9G9R9_FLOAT + 0, // D32_FLOAT + 0, // D16_UNORM + 0, // D24_UNORM_S8_UINT + 0, // S8_UINT_D24_UNORM + 0, // D32_FLOAT_S8_UINT }}; /** @@ -229,86 +251,97 @@ inline constexpr u32 GetCompressionFactor(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16S - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG8UI - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 1, // R32I - 8, // ASTC_2D_8X8 - 8, // ASTC_2D_8X5 - 5, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 1, // R4G4B4A4U + 1, // A8B8G8R8_UNORM + 1, // A8B8G8R8_SNORM + 1, // A8B8G8R8_SINT + 1, // A8B8G8R8_UINT + 1, // R5G6B5_UNORM + 1, // B5G6R5_UNORM + 1, // A1R5G5B5_UNORM + 1, // A2B10G10R10_UNORM + 1, // A2B10G10R10_UINT + 1, // A1B5G5R5_UNORM + 1, // R8_UNORM + 1, // R8_SNORM + 1, // R8_SINT + 1, // R8_UINT + 1, // R16G16B16A16_FLOAT + 1, // R16G16B16A16_UNORM + 1, // R16G16B16A16_SNORM + 1, // R16G16B16A16_SINT + 1, // R16G16B16A16_UINT + 1, // B10G11R11_FLOAT + 1, // R32G32B32A32_UINT + 4, // BC1_RGBA_UNORM + 4, // BC2_UNORM + 4, // BC3_UNORM + 4, // BC4_UNORM + 4, // BC4_SNORM + 4, // BC5_UNORM + 4, // BC5_SNORM + 4, // BC7_UNORM + 4, // BC6H_UFLOAT + 4, // BC6H_SFLOAT + 4, // ASTC_2D_4X4_UNORM + 1, // B8G8R8A8_UNORM + 1, // R32G32B32A32_FLOAT + 1, // R32G32B32A32_SINT + 1, // R32G32_FLOAT + 1, // R32G32_SINT + 1, // R32_FLOAT + 1, // R16_FLOAT + 1, // R16_UNORM + 1, // R16_SNORM + 1, // R16_UINT + 1, // R16_SINT + 1, // R16G16_UNORM + 1, // R16G16_FLOAT + 1, // R16G16_UINT + 1, // R16G16_SINT + 1, // R16G16_SNORM + 1, // R32G32B32_FLOAT + 1, // A8B8G8R8_SRGB + 1, // R8G8_UNORM + 1, // R8G8_SNORM + 1, // R8G8_SINT + 1, // R8G8_UINT + 1, // R32G32_UINT + 1, // R16G16B16X16_FLOAT + 1, // R32_UINT + 1, // R32_SINT + 8, // ASTC_2D_8X8_UNORM + 8, // ASTC_2D_8X5_UNORM + 5, // ASTC_2D_5X4_UNORM + 1, // B8G8R8A8_SRGB + 4, // BC1_RGBA_SRGB + 4, // BC2_SRGB + 4, // BC3_SRGB + 4, // BC7_SRGB + 1, // A4B4G4R4_UNORM 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 8, // ASTC_2D_8X5_SRGB 5, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_UNORM 5, // ASTC_2D_5X5_SRGB - 10, // ASTC_2D_10X8 + 10, // ASTC_2D_10X8_UNORM 10, // ASTC_2D_10X8_SRGB - 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_UNORM 6, // ASTC_2D_6X6_SRGB - 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_UNORM 10, // ASTC_2D_10X10_SRGB - 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_UNORM 12, // ASTC_2D_12X12_SRGB - 8, // ASTC_2D_8X6 + 8, // ASTC_2D_8X6_UNORM 8, // ASTC_2D_8X6_SRGB - 6, // ASTC_2D_6X5 + 6, // ASTC_2D_6X5_UNORM 6, // ASTC_2D_6X5_SRGB - 1, // E5B9G9R9F - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // E5B9G9R9_FLOAT + 1, // D32_FLOAT + 1, // D16_UNORM + 1, // D24_UNORM_S8_UINT + 1, // S8_UINT_D24_UNORM + 1, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { @@ -320,86 +353,97 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16S - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG8UI - 1, // RG32UI - 1, // RGBX16F - 1, // R32UI - 1, // R32I - 8, // ASTC_2D_8X8 - 5, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 1, // R4G4B4A4U + 1, // A8B8G8R8_UNORM + 1, // A8B8G8R8_SNORM + 1, // A8B8G8R8_SINT + 1, // A8B8G8R8_UINT + 1, // R5G6B5_UNORM + 1, // B5G6R5_UNORM + 1, // A1R5G5B5_UNORM + 1, // A2B10G10R10_UNORM + 1, // A2B10G10R10_UINT + 1, // A1B5G5R5_UNORM + 1, // R8_UNORM + 1, // R8_SNORM + 1, // R8_SINT + 1, // R8_UINT + 1, // R16G16B16A16_FLOAT + 1, // R16G16B16A16_UNORM + 1, // R16G16B16A16_SNORM + 1, // R16G16B16A16_SINT + 1, // R16G16B16A16_UINT + 1, // B10G11R11_FLOAT + 1, // R32G32B32A32_UINT + 4, // BC1_RGBA_UNORM + 4, // BC2_UNORM + 4, // BC3_UNORM + 4, // BC4_UNORM + 4, // BC4_SNORM + 4, // BC5_UNORM + 4, // BC5_SNORM + 4, // BC7_UNORM + 4, // BC6H_UFLOAT + 4, // BC6H_SFLOAT + 4, // ASTC_2D_4X4_UNORM + 1, // B8G8R8A8_UNORM + 1, // R32G32B32A32_FLOAT + 1, // R32G32B32A32_SINT + 1, // R32G32_FLOAT + 1, // R32G32_SINT + 1, // R32_FLOAT + 1, // R16_FLOAT + 1, // R16_UNORM + 1, // R16_SNORM + 1, // R16_UINT + 1, // R16_SINT + 1, // R16G16_UNORM + 1, // R16G16_FLOAT + 1, // R16G16_UINT + 1, // R16G16_SINT + 1, // R16G16_SNORM + 1, // R32G32B32_FLOAT + 1, // A8B8G8R8_SRGB + 1, // R8G8_UNORM + 1, // R8G8_SNORM + 1, // R8G8_SINT + 1, // R8G8_UINT + 1, // R32G32_UINT + 1, // R16G16B16X16_FLOAT + 1, // R32_UINT + 1, // R32_SINT + 8, // ASTC_2D_8X8_UNORM + 5, // ASTC_2D_8X5_UNORM + 4, // ASTC_2D_5X4_UNORM + 1, // B8G8R8A8_SRGB + 4, // BC1_RGBA_SRGB + 4, // BC2_SRGB + 4, // BC3_SRGB + 4, // BC7_SRGB + 1, // A4B4G4R4_UNORM 4, // ASTC_2D_4X4_SRGB 8, // ASTC_2D_8X8_SRGB 5, // ASTC_2D_8X5_SRGB 4, // ASTC_2D_5X4_SRGB - 5, // ASTC_2D_5X5 + 5, // ASTC_2D_5X5_UNORM 5, // ASTC_2D_5X5_SRGB - 8, // ASTC_2D_10X8 + 8, // ASTC_2D_10X8_UNORM 8, // ASTC_2D_10X8_SRGB - 6, // ASTC_2D_6X6 + 6, // ASTC_2D_6X6_UNORM 6, // ASTC_2D_6X6_SRGB - 10, // ASTC_2D_10X10 + 10, // ASTC_2D_10X10_UNORM 10, // ASTC_2D_10X10_SRGB - 12, // ASTC_2D_12X12 + 12, // ASTC_2D_12X12_UNORM 12, // ASTC_2D_12X12_SRGB - 6, // ASTC_2D_8X6 + 6, // ASTC_2D_8X6_UNORM 6, // ASTC_2D_8X6_SRGB - 5, // ASTC_2D_6X5 + 5, // ASTC_2D_6X5_UNORM 5, // ASTC_2D_6X5_SRGB - 1, // E5B9G9R9F - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 + 1, // E5B9G9R9_FLOAT + 1, // D32_FLOAT + 1, // D16_UNORM + 1, // D24_UNORM_S8_UINT + 1, // S8_UINT_D24_UNORM + 1, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { @@ -411,86 +455,97 @@ static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { } constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ - 32, // ABGR8U - 32, // ABGR8S - 32, // ABGR8UI - 16, // B5G6R5U - 32, // A2B10G10R10U - 16, // A1B5G5R5U - 8, // R8U - 8, // R8UI - 64, // RGBA16F - 64, // RGBA16U - 64, // RGBA16S - 64, // RGBA16UI - 32, // R11FG11FB10F - 128, // RGBA32UI - 64, // DXT1 - 128, // DXT23 - 128, // DXT45 - 64, // DXN1 - 128, // DXN2UNORM - 128, // DXN2SNORM - 128, // BC7U - 128, // BC6H_UF16 - 128, // BC6H_SF16 - 128, // ASTC_2D_4X4 - 32, // BGRA8 - 128, // RGBA32F - 64, // RG32F - 32, // R32F - 16, // R16F - 16, // R16U - 16, // R16S - 16, // R16UI - 16, // R16I - 32, // RG16 - 32, // RG16F - 32, // RG16UI - 32, // RG16I - 32, // RG16S - 96, // RGB32F - 32, // RGBA8_SRGB - 16, // RG8U - 16, // RG8S - 16, // RG8UI - 64, // RG32UI - 64, // RGBX16F - 32, // R32UI - 32, // R32I - 128, // ASTC_2D_8X8 - 128, // ASTC_2D_8X5 - 128, // ASTC_2D_5X4 - 32, // BGRA8_SRGB - 64, // DXT1_SRGB - 128, // DXT23_SRGB - 128, // DXT45_SRGB - 128, // BC7U - 16, // R4G4B4A4U + 32, // A8B8G8R8_UNORM + 32, // A8B8G8R8_SNORM + 32, // A8B8G8R8_SINT + 32, // A8B8G8R8_UINT + 16, // R5G6B5_UNORM + 16, // B5G6R5_UNORM + 16, // A1R5G5B5_UNORM + 32, // A2B10G10R10_UNORM + 32, // A2B10G10R10_UINT + 16, // A1B5G5R5_UNORM + 8, // R8_UNORM + 8, // R8_SNORM + 8, // R8_SINT + 8, // R8_UINT + 64, // R16G16B16A16_FLOAT + 64, // R16G16B16A16_UNORM + 64, // R16G16B16A16_SNORM + 64, // R16G16B16A16_SINT + 64, // R16G16B16A16_UINT + 32, // B10G11R11_FLOAT + 128, // R32G32B32A32_UINT + 64, // BC1_RGBA_UNORM + 128, // BC2_UNORM + 128, // BC3_UNORM + 64, // BC4_UNORM + 64, // BC4_SNORM + 128, // BC5_UNORM + 128, // BC5_SNORM + 128, // BC7_UNORM + 128, // BC6H_UFLOAT + 128, // BC6H_SFLOAT + 128, // ASTC_2D_4X4_UNORM + 32, // B8G8R8A8_UNORM + 128, // R32G32B32A32_FLOAT + 128, // R32G32B32A32_SINT + 64, // R32G32_FLOAT + 64, // R32G32_SINT + 32, // R32_FLOAT + 16, // R16_FLOAT + 16, // R16_UNORM + 16, // R16_SNORM + 16, // R16_UINT + 16, // R16_SINT + 32, // R16G16_UNORM + 32, // R16G16_FLOAT + 32, // R16G16_UINT + 32, // R16G16_SINT + 32, // R16G16_SNORM + 96, // R32G32B32_FLOAT + 32, // A8B8G8R8_SRGB + 16, // R8G8_UNORM + 16, // R8G8_SNORM + 16, // R8G8_SINT + 16, // R8G8_UINT + 64, // R32G32_UINT + 64, // R16G16B16X16_FLOAT + 32, // R32_UINT + 32, // R32_SINT + 128, // ASTC_2D_8X8_UNORM + 128, // ASTC_2D_8X5_UNORM + 128, // ASTC_2D_5X4_UNORM + 32, // B8G8R8A8_SRGB + 64, // BC1_RGBA_SRGB + 128, // BC2_SRGB + 128, // BC3_SRGB + 128, // BC7_UNORM + 16, // A4B4G4R4_UNORM 128, // ASTC_2D_4X4_SRGB 128, // ASTC_2D_8X8_SRGB 128, // ASTC_2D_8X5_SRGB 128, // ASTC_2D_5X4_SRGB - 128, // ASTC_2D_5X5 + 128, // ASTC_2D_5X5_UNORM 128, // ASTC_2D_5X5_SRGB - 128, // ASTC_2D_10X8 + 128, // ASTC_2D_10X8_UNORM 128, // ASTC_2D_10X8_SRGB - 128, // ASTC_2D_6X6 + 128, // ASTC_2D_6X6_UNORM 128, // ASTC_2D_6X6_SRGB - 128, // ASTC_2D_10X10 + 128, // ASTC_2D_10X10_UNORM 128, // ASTC_2D_10X10_SRGB - 128, // ASTC_2D_12X12 + 128, // ASTC_2D_12X12_UNORM 128, // ASTC_2D_12X12_SRGB - 128, // ASTC_2D_8X6 + 128, // ASTC_2D_8X6_UNORM 128, // ASTC_2D_8X6_SRGB - 128, // ASTC_2D_6X5 + 128, // ASTC_2D_6X5_UNORM 128, // ASTC_2D_6X5_SRGB - 32, // E5B9G9R9F - 32, // Z32F - 16, // Z16 - 32, // Z24S8 - 32, // S8Z24 - 64, // Z32FS8 + 32, // E5B9G9R9_FLOAT + 32, // D32_FLOAT + 16, // D16_UNORM + 32, // D24_UNORM_S8_UINT + 32, // S8_UINT_D24_UNORM + 64, // D32_FLOAT_S8_UINT }}; static constexpr u32 GetFormatBpp(PixelFormat format) { @@ -529,7 +584,4 @@ bool IsPixelFormatSRGB(PixelFormat format); std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); -/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN -bool IsFormatBCn(PixelFormat format); - } // namespace VideoCore::Surface diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index f476f03b0..a1cc4756d 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,119 +41,126 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array<Table, 78> DefinitionTable = {{ - {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, - {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, - {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, - {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB}, +constexpr std::array<Table, 86> DefinitionTable = {{ + {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, + {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, + {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, + {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, + {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, - {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U}, + {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, - {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U}, + {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, + {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, - {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U}, + {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, - {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U}, + {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, - {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U}, - {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI}, + {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, + {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, + {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, + {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, - {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, - {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, - {TextureFormat::G8R8, C, UINT, UINT, UINT, UINT, PixelFormat::RG8UI}, + {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, + {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, + {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, + {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, - {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S}, - {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, - {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, - {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, + {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, + {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, + {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, + {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, + {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, - {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F}, - {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16}, - {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S}, - {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI}, - {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I}, + {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, + {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, + {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, + {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, + {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, - {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F}, - {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U}, - {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S}, - {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI}, - {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I}, + {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, + {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, + {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, + {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, + {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, - {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F}, + {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, - {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F}, - {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI}, + {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, + {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, + {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, - {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F}, + {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, - {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F}, - {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI}, + {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, + {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, + {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, - {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, - {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, - {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I}, + {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, + {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, + {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, - {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, + {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, - {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, - {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, - {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, - {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, - {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, + {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, + {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, + {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, + {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, + {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, - {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, - {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB}, + {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, + {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, - {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23}, - {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB}, + {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, + {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, - {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45}, - {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB}, + {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, + {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, - // TODO: Use a different pixel format for SNORM - {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1}, - {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1}, + {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, + {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, - {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM}, - {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM}, + {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, + {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, - {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U}, - {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB}, + {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, + {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, - {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16}, - {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16}, + {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, + {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, - {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4}, + {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, - {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4}, + {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, - {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5}, + {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, - {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8}, + {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, - {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5}, + {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, - {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8}, + {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, - {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6}, + {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, - {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10}, + {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, - {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12}, + {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, - {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6}, + {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, - {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5}, + {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, }}; @@ -184,7 +191,7 @@ PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb static_cast<int>(format), is_srgb, static_cast<int>(red_component), static_cast<int>(green_component), static_cast<int>(blue_component), static_cast<int>(alpha_component)); - return PixelFormat::ABGR8U; + return PixelFormat::A8B8G8R8_UNORM; } void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 0caf3b4f0..dfcf36e0b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -228,7 +228,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - if (!is_converted && params.pixel_format != PixelFormat::S8Z24) { + if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { return; } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 921562c1f..9e5fe2374 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -83,12 +83,12 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta params.type = GetFormatType(params.pixel_format); if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { - case PixelFormat::R16U: - case PixelFormat::R16F: - params.pixel_format = PixelFormat::Z16; + case PixelFormat::R16_UNORM: + case PixelFormat::R16_FLOAT: + params.pixel_format = PixelFormat::D16_UNORM; break; - case PixelFormat::R32F: - params.pixel_format = PixelFormat::Z32F; + case PixelFormat::R32_FLOAT: + params.pixel_format = PixelFormat::D32_FLOAT; break; default: UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", @@ -195,8 +195,8 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz SurfaceParams params; params.is_tiled = config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || + config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; params.block_width = config.memory_layout.block_width; params.block_height = config.memory_layout.block_height; params.block_depth = config.memory_layout.block_depth; @@ -235,8 +235,8 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params{}; params.is_tiled = !config.linear; - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || + config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cdcddb225..96c4e4cc2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -373,9 +373,9 @@ protected: siblings_table[static_cast<std::size_t>(b)] = a; }; std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); - make_siblings(PixelFormat::Z16, PixelFormat::R16U); - make_siblings(PixelFormat::Z32F, PixelFormat::R32F); - make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); + make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); + make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); sampled_textures.reserve(64); } @@ -1031,7 +1031,7 @@ private: params.pitch = 4; params.num_levels = 1; params.emulated_levels = 1; - params.pixel_format = VideoCore::Surface::PixelFormat::R8U; + params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; params.type = VideoCore::Surface::SurfaceType::ColorTexture; auto surface = CreateSurface(0ULL, params); invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index f3efa7eb0..962921483 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp @@ -35,7 +35,7 @@ void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { S8Z24 s8z24_pixel{}; Z24S8 z24s8_pixel{}; constexpr auto bpp{ - VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)}; + VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; for (std::size_t y = 0; y < height; ++y) { for (std::size_t x = 0; x < width; ++x) { const std::size_t offset{bpp * (y * width + x)}; @@ -73,7 +73,7 @@ void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, in_data, width, height, depth, block_width, block_height); std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); - } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { + } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); } } @@ -85,7 +85,7 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h static_cast<u32>(pixel_format)); UNREACHABLE(); - } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { + } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); } } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 98beabef1..474ae620a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -184,53 +184,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, } } -u32 BytesPerPixel(TextureFormat format) { - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXN1: - // In this case a 'pixel' actually refers to a 4x4 tile. - return 8; - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN2: - case TextureFormat::BC7U: - case TextureFormat::BC6H_UF16: - case TextureFormat::BC6H_SF16: - // In this case a 'pixel' actually refers to a 4x4 tile. - return 16; - case TextureFormat::R32_G32_B32: - return 12; - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::ASTC_2D_5X4: - case TextureFormat::ASTC_2D_8X8: - case TextureFormat::ASTC_2D_8X5: - case TextureFormat::ASTC_2D_10X8: - case TextureFormat::ASTC_2D_5X5: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::BF10GF11RF11: - case TextureFormat::R32: - case TextureFormat::R16_G16: - return 4; - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::G8R8: - case TextureFormat::R16: - return 2; - case TextureFormat::R8: - return 1; - case TextureFormat::R16_G16_B16_A16: - return 8; - case TextureFormat::R32_G32_B32_A32: - return 16; - case TextureFormat::R32_G32: - return 8; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - return 1; - } -} - void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 width_spacing) { @@ -348,48 +301,6 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 } } -std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, - u32 height) { - std::vector<u8> rgba_data; - - // TODO(Subv): Implement. - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN1: - case TextureFormat::DXN2: - case TextureFormat::BC7U: - case TextureFormat::BC6H_UF16: - case TextureFormat::BC6H_SF16: - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::ASTC_2D_8X8: - case TextureFormat::ASTC_2D_5X5: - case TextureFormat::ASTC_2D_10X8: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::R8: - case TextureFormat::G8R8: - case TextureFormat::BF10GF11RF11: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R16_G16: - case TextureFormat::R32_G32_B32: - // TODO(Subv): For the time being just forward the same data without any decoding. - rgba_data = texture_data; - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - - return rgba_data; -} - std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 232b696b3..d6fe35d37 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -38,10 +38,6 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); -/// Decodes an unswizzled texture into a A8R8G8B8 texture. -std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, - u32 height); - /// This function calculates the correct size of a texture depending if it's tiled or not. std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index eba05aced..0574fef12 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -12,10 +12,10 @@ namespace Tegra::Texture { enum class TextureFormat : u32 { - R32_G32_B32_A32 = 0x01, - R32_G32_B32 = 0x02, - R16_G16_B16_A16 = 0x03, - R32_G32 = 0x04, + R32G32B32A32 = 0x01, + R32G32B32 = 0x02, + R16G16B16A16 = 0x03, + R32G32 = 0x04, R32_B24G8 = 0x05, ETC2_RGB = 0x06, X8B8G8R8 = 0x07, @@ -23,19 +23,19 @@ enum class TextureFormat : u32 { A2B10G10R10 = 0x09, ETC2_RGB_PTA = 0x0a, ETC2_RGBA = 0x0b, - R16_G16 = 0x0c, - G8R24 = 0x0d, - G24R8 = 0x0e, + R16G16 = 0x0c, + R24G8 = 0x0d, + R8G24 = 0x0e, R32 = 0x0f, - BC6H_SF16 = 0x10, - BC6H_UF16 = 0x11, + BC6H_SFLOAT = 0x10, + BC6H_UFLOAT = 0x11, A4B4G4R4 = 0x12, A5B5G5R1 = 0x13, A1B5G5R5 = 0x14, B5G6R5 = 0x15, B6G5R5 = 0x16, - BC7U = 0x17, - G8R8 = 0x18, + BC7 = 0x17, + R8G8 = 0x18, EAC = 0x19, EACX2 = 0x1a, R16 = 0x1b, @@ -43,23 +43,23 @@ enum class TextureFormat : u32 { R8 = 0x1d, G4R4 = 0x1e, R1 = 0x1f, - E5B9G9R9_SHAREDEXP = 0x20, - BF10GF11RF11 = 0x21, + E5B9G9R9 = 0x20, + B10G11R11 = 0x21, G8B8G8R8 = 0x22, B8G8R8G8 = 0x23, - DXT1 = 0x24, - DXT23 = 0x25, - DXT45 = 0x26, - DXN1 = 0x27, - DXN2 = 0x28, - S8Z24 = 0x29, + BC1_RGBA = 0x24, + BC2 = 0x25, + BC3 = 0x26, + BC4 = 0x27, + BC5 = 0x28, + S8D24 = 0x29, X8Z24 = 0x2a, - Z24S8 = 0x2b, + D24S8 = 0x2b, X4V4Z24__COV4R4V = 0x2c, X4V4Z24__COV8R8V = 0x2d, V8Z24__COV4R12V = 0x2e, - ZF32 = 0x2f, - ZF32_X24S8 = 0x30, + D32 = 0x2f, + D32S8 = 0x30, X8Z24_X20V4S8__COV4R4V = 0x31, X8Z24_X20V4S8__COV8R8V = 0x32, ZF32_X20V4X8__COV4R4V = 0x33, @@ -69,7 +69,7 @@ enum class TextureFormat : u32 { X8Z24_X16V8S8__COV4R12V = 0x37, ZF32_X16V8X8__COV4R12V = 0x38, ZF32_X16V8S8__COV4R12V = 0x39, - Z16 = 0x3a, + D16 = 0x3a, V8Z24__COV8R24V = 0x3b, X8Z24_X16V8S8__COV8R24V = 0x3c, ZF32_X16V8X8__COV8R24V = 0x3d, @@ -375,7 +375,4 @@ struct FullTextureInfo { TSCEntry tsc; }; -/// Returns the number of bytes per pixel of the input texture format. -u32 BytesPerPixel(TextureFormat format); - } // namespace Tegra::Texture diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index a862b2610..656096c9f 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -133,11 +133,44 @@ file(GLOB COMPAT_LIST file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) +if (ENABLE_QT_TRANSLATION) + set(YUZU_QT_LANGUAGES "${PROJECT_SOURCE_DIR}/dist/languages" CACHE PATH "Path to the translation bundle for the Qt frontend") + option(GENERATE_QT_TRANSLATION "Generate en.ts as the translation source file" OFF) + + # Update source TS file if enabled + if (GENERATE_QT_TRANSLATION) + get_target_property(SRCS yuzu SOURCES) + qt5_create_translation(QM_FILES ${SRCS} ${UIS} ${YUZU_QT_LANGUAGES}/en.ts) + add_custom_target(translation ALL DEPENDS ${YUZU_QT_LANGUAGES}/en.ts) + endif() + + # Find all TS files except en.ts + file(GLOB_RECURSE LANGUAGES_TS ${YUZU_QT_LANGUAGES}/*.ts) + list(REMOVE_ITEM LANGUAGES_TS ${YUZU_QT_LANGUAGES}/en.ts) + + # Compile TS files to QM files + qt5_add_translation(LANGUAGES_QM ${LANGUAGES_TS}) + + # Build a QRC file from the QM file list + set(LANGUAGES_QRC ${CMAKE_CURRENT_BINARY_DIR}/languages.qrc) + file(WRITE ${LANGUAGES_QRC} "<RCC><qresource prefix=\"languages\">\n") + foreach (QM ${LANGUAGES_QM}) + get_filename_component(QM_FILE ${QM} NAME) + file(APPEND ${LANGUAGES_QRC} "<file>${QM_FILE}</file>\n") + endforeach (QM) + file(APPEND ${LANGUAGES_QRC} "</qresource></RCC>") + + # Add the QRC file to package in all QM files + qt5_add_resources(LANGUAGES ${LANGUAGES_QRC}) +else() + set(LANGUAGES) +endif() target_sources(yuzu PRIVATE ${COMPAT_LIST} ${ICONS} + ${LANGUAGES} ${THEMES} ) diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index d25b99a32..59a193edd 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -611,6 +611,7 @@ void Config::ReadPathValues() { } } UISettings::values.recent_files = ReadSetting(QStringLiteral("recentFiles")).toStringList(); + UISettings::values.language = ReadSetting(QStringLiteral("language"), QString{}).toString(); qt_config->endGroup(); } @@ -661,6 +662,8 @@ void Config::ReadRendererValues() { ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), false); + ReadSettingGlobal(Settings::values.use_asynchronous_shaders, + QStringLiteral("use_asynchronous_shaders"), false); ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), true); ReadSettingGlobal(Settings::values.force_30fps_mode, QStringLiteral("force_30fps_mode"), false); @@ -1093,6 +1096,7 @@ void Config::SavePathValues() { } qt_config->endArray(); WriteSetting(QStringLiteral("recentFiles"), UISettings::values.recent_files); + WriteSetting(QStringLiteral("language"), UISettings::values.language, QString{}); qt_config->endGroup(); } @@ -1145,6 +1149,8 @@ void Config::SaveRendererValues() { WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders, false); + WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), + Settings::values.use_asynchronous_shaders, false); WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); WriteSettingGlobal(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index a5afb354f..4e30dc51e 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -23,6 +23,7 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry) SetConfiguration(); PopulateSelectionList(); + connect(ui->uiTab, &ConfigureUi::LanguageChanged, this, &ConfigureDialog::OnLanguageChanged); connect(ui->selectorList, &QListWidget::itemSelectionChanged, this, &ConfigureDialog::UpdateVisibleTabs); @@ -98,6 +99,14 @@ void ConfigureDialog::PopulateSelectionList() { } } +void ConfigureDialog::OnLanguageChanged(const QString& locale) { + emit LanguageChanged(locale); + // first apply the configuration, and then restore the display + ApplyConfiguration(); + RetranslateUI(); + SetConfiguration(); +} + void ConfigureDialog::UpdateVisibleTabs() { const auto items = ui->selectorList->selectedItems(); if (items.isEmpty()) { diff --git a/src/yuzu/configuration/configure_dialog.h b/src/yuzu/configuration/configure_dialog.h index 2d3bfc2da..4289bc225 100644 --- a/src/yuzu/configuration/configure_dialog.h +++ b/src/yuzu/configuration/configure_dialog.h @@ -22,6 +22,12 @@ public: void ApplyConfiguration(); +private slots: + void OnLanguageChanged(const QString& locale); + +signals: + void LanguageChanged(const QString& locale); + private: void changeEvent(QEvent* event) override; diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 7c0fa7ec5..ce30188cd 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -24,6 +24,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); ui->use_assembly_shaders->setEnabled(runtime_lock); + ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->force_30fps_mode->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); @@ -32,6 +33,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { static_cast<int>(Settings::values.gpu_accuracy.GetValue())); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); + ui->use_asynchronous_shaders->setChecked( + Settings::values.use_asynchronous_shaders.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode.GetValue()); ui->anisotropic_filtering_combobox->setCurrentIndex( @@ -41,6 +44,10 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ConfigurationShared::SetPerGameSetting(ui->use_vsync, &Settings::values.use_vsync); ConfigurationShared::SetPerGameSetting(ui->use_assembly_shaders, &Settings::values.use_assembly_shaders); + ConfigurationShared::SetPerGameSetting(ui->use_asynchronous_shaders, + &Settings::values.use_asynchronous_shaders); + ConfigurationShared::SetPerGameSetting(ui->use_asynchronous_shaders, + &Settings::values.use_asynchronous_shaders); ConfigurationShared::SetPerGameSetting(ui->use_fast_gpu_time, &Settings::values.use_fast_gpu_time); ConfigurationShared::SetPerGameSetting(ui->force_30fps_mode, @@ -67,6 +74,14 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { if (Settings::values.use_assembly_shaders.UsingGlobal()) { Settings::values.use_assembly_shaders.SetValue(ui->use_assembly_shaders->isChecked()); } + if (Settings::values.use_asynchronous_shaders.UsingGlobal()) { + Settings::values.use_asynchronous_shaders.SetValue( + ui->use_asynchronous_shaders->isChecked()); + } + if (Settings::values.use_asynchronous_shaders.UsingGlobal()) { + Settings::values.use_asynchronous_shaders.SetValue( + ui->use_asynchronous_shaders->isChecked()); + } if (Settings::values.use_fast_gpu_time.UsingGlobal()) { Settings::values.use_fast_gpu_time.SetValue(ui->use_fast_gpu_time->isChecked()); } @@ -83,6 +98,10 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, ui->use_assembly_shaders); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, + ui->use_asynchronous_shaders); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, + ui->use_asynchronous_shaders); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, ui->use_fast_gpu_time); ConfigurationShared::ApplyPerGameSetting(&Settings::values.force_30fps_mode, @@ -117,6 +136,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); + ui->use_asynchronous_shaders->setEnabled( + Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); ui->force_30fps_mode->setEnabled(Settings::values.force_30fps_mode.UsingGlobal()); ui->anisotropic_filtering_combobox->setEnabled( @@ -128,6 +149,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ConfigurationShared::InsertGlobalItem(ui->gpu_accuracy); ui->use_vsync->setTristate(true); ui->use_assembly_shaders->setTristate(true); + ui->use_asynchronous_shaders->setTristate(true); ui->use_fast_gpu_time->setTristate(true); ui->force_30fps_mode->setTristate(true); ConfigurationShared::InsertGlobalItem(ui->anisotropic_filtering_combobox); diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 0021607ac..71e7dfe5e 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -73,6 +73,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_asynchronous_shaders"> + <property name="toolTip"> + <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> + </property> + <property name="text"> + <string>Use asynchronous shader building (experimental, OpenGL or Assembly shaders only)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="force_30fps_mode"> <property name="text"> <string>Force 30 FPS mode</string> diff --git a/src/yuzu/configuration/configure_ui.cpp b/src/yuzu/configuration/configure_ui.cpp index 94424ee44..24b6c5b72 100644 --- a/src/yuzu/configuration/configure_ui.cpp +++ b/src/yuzu/configuration/configure_ui.cpp @@ -5,6 +5,7 @@ #include <array> #include <utility> +#include <QDirIterator> #include "common/common_types.h" #include "core/settings.h" #include "ui_configure_ui.h" @@ -29,6 +30,8 @@ constexpr std::array row_text_names{ ConfigureUi::ConfigureUi(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureUi) { ui->setupUi(this); + InitializeLanguageComboBox(); + for (const auto& theme : UISettings::themes) { ui->theme_combobox->addItem(QString::fromUtf8(theme.first), QString::fromUtf8(theme.second)); @@ -72,6 +75,8 @@ void ConfigureUi::RequestGameListUpdate() { void ConfigureUi::SetConfiguration() { ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme)); + ui->language_combobox->setCurrentIndex( + ui->language_combobox->findData(UISettings::values.language)); ui->show_add_ons->setChecked(UISettings::values.show_add_ons); ui->icon_size_combobox->setCurrentIndex( ui->icon_size_combobox->findData(UISettings::values.icon_size)); @@ -100,6 +105,25 @@ void ConfigureUi::RetranslateUI() { } } +void ConfigureUi::InitializeLanguageComboBox() { + ui->language_combobox->addItem(tr("<System>"), QString{}); + ui->language_combobox->addItem(tr("English"), QStringLiteral("en")); + QDirIterator it(QStringLiteral(":/languages"), QDirIterator::NoIteratorFlags); + while (it.hasNext()) { + QString locale = it.next(); + locale.truncate(locale.lastIndexOf(QLatin1Char{'.'})); + locale.remove(0, locale.lastIndexOf(QLatin1Char{'/'}) + 1); + const QString lang = QLocale::languageToString(QLocale(locale).language()); + ui->language_combobox->addItem(lang, locale); + } + + // Unlike other configuration changes, interface language changes need to be reflected on the + // interface immediately. This is done by passing a signal to the main window, and then + // retranslating when passing back. + connect(ui->language_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, + &ConfigureUi::OnLanguageChanged); +} + void ConfigureUi::InitializeIconSizeComboBox() { for (const auto& size : default_icon_sizes) { ui->icon_size_combobox->addItem(QString::fromUtf8(size.second), size.first); @@ -147,3 +171,10 @@ void ConfigureUi::UpdateSecondRowComboBox(bool init) { ui->row_2_text_combobox->removeItem( ui->row_2_text_combobox->findData(ui->row_1_text_combobox->currentData())); } + +void ConfigureUi::OnLanguageChanged(int index) { + if (index == -1) + return; + + emit LanguageChanged(ui->language_combobox->itemData(index).toString()); +} diff --git a/src/yuzu/configuration/configure_ui.h b/src/yuzu/configuration/configure_ui.h index d471afe99..c30bcf6ff 100644 --- a/src/yuzu/configuration/configure_ui.h +++ b/src/yuzu/configuration/configure_ui.h @@ -20,6 +20,12 @@ public: void ApplyConfiguration(); +private slots: + void OnLanguageChanged(int index); + +signals: + void LanguageChanged(const QString& locale); + private: void RequestGameListUpdate(); @@ -28,6 +34,7 @@ private: void changeEvent(QEvent*) override; void RetranslateUI(); + void InitializeLanguageComboBox(); void InitializeIconSizeComboBox(); void InitializeRowComboBoxes(); diff --git a/src/yuzu/configuration/configure_ui.ui b/src/yuzu/configuration/configure_ui.ui index bd5c5d3c2..0b81747d7 100644 --- a/src/yuzu/configuration/configure_ui.ui +++ b/src/yuzu/configuration/configure_ui.ui @@ -13,112 +13,132 @@ <property name="windowTitle"> <string>Form</string> </property> - <layout class="QHBoxLayout" name="HorizontalLayout"> + <layout class="QVBoxLayout" name="verticalLayout"> <item> - <layout class="QVBoxLayout" name="VerticalLayout"> - <item> - <widget class="QGroupBox" name="GeneralGroupBox"> - <property name="title"> - <string>General</string> - </property> - <layout class="QHBoxLayout" name="horizontalLayout"> + <widget class="QGroupBox" name="general_groupBox"> + <property name="title"> + <string>General</string> + </property> + <layout class="QHBoxLayout" name="horizontalLayout"> + <item> + <layout class="QVBoxLayout" name="verticalLayout_2"> <item> - <layout class="QVBoxLayout" name="verticalLayout"> + <widget class="QLabel" name="label_change_language_info"> + <property name="text"> + <string>Note: Changing language will apply your configuration.</string> + </property> + <property name="wordWrap"> + <bool>true</bool> + </property> + </widget> + </item> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_2"> + <item> + <widget class="QLabel" name="language_label"> + <property name="text"> + <string>Interface language:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="language_combobox"/> + </item> + </layout> + </item> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_3"> + <item> + <widget class="QLabel" name="theme_label"> + <property name="text"> + <string>Theme:</string> + </property> + </widget> + </item> <item> - <layout class="QHBoxLayout" name="horizontalLayout_3"> - <item> - <widget class="QLabel" name="theme_label"> - <property name="text"> - <string>Theme:</string> - </property> - </widget> - </item> - <item> - <widget class="QComboBox" name="theme_combobox"/> - </item> - </layout> + <widget class="QComboBox" name="theme_combobox"/> </item> </layout> </item> </layout> - </widget> - </item> - <item> - <widget class="QGroupBox" name="GameListGroupBox"> - <property name="title"> - <string>Game List</string> - </property> - <layout class="QHBoxLayout" name="GameListHorizontalLayout"> + </item> + </layout> + </widget> + </item> + <item> + <widget class="QGroupBox" name="GameListGroupBox"> + <property name="title"> + <string>Game List</string> + </property> + <layout class="QHBoxLayout" name="GameListHorizontalLayout"> + <item> + <layout class="QVBoxLayout" name="GeneralVerticalLayout"> <item> - <layout class="QVBoxLayout" name="GeneralVerticalLayout"> + <widget class="QCheckBox" name="show_add_ons"> + <property name="text"> + <string>Show Add-Ons Column</string> + </property> + </widget> + </item> + <item> + <layout class="QHBoxLayout" name="icon_size_qhbox_layout_2"> <item> - <widget class="QCheckBox" name="show_add_ons"> + <widget class="QLabel" name="icon_size_label"> <property name="text"> - <string>Show Add-Ons Column</string> + <string>Icon Size:</string> </property> </widget> </item> <item> - <layout class="QHBoxLayout" name="icon_size_qhbox_layout_2"> - <item> - <widget class="QLabel" name="icon_size_label"> - <property name="text"> - <string>Icon Size:</string> - </property> - </widget> - </item> - <item> - <widget class="QComboBox" name="icon_size_combobox"/> - </item> - </layout> + <widget class="QComboBox" name="icon_size_combobox"/> </item> + </layout> + </item> + <item> + <layout class="QHBoxLayout" name="row_1_qhbox_layout"> <item> - <layout class="QHBoxLayout" name="row_1_qhbox_layout"> - <item> - <widget class="QLabel" name="row_1_label"> - <property name="text"> - <string>Row 1 Text:</string> - </property> - </widget> - </item> - <item> - <widget class="QComboBox" name="row_1_text_combobox"/> - </item> - </layout> + <widget class="QLabel" name="row_1_label"> + <property name="text"> + <string>Row 1 Text:</string> + </property> + </widget> </item> <item> - <layout class="QHBoxLayout" name="row_2_qhbox_layout"> - <item> - <widget class="QLabel" name="row_2_label"> - <property name="text"> - <string>Row 2 Text:</string> - </property> - </widget> - </item> - <item> - <widget class="QComboBox" name="row_2_text_combobox"/> - </item> - </layout> + <widget class="QComboBox" name="row_1_text_combobox"/> + </item> + </layout> + </item> + <item> + <layout class="QHBoxLayout" name="row_2_qhbox_layout"> + <item> + <widget class="QLabel" name="row_2_label"> + <property name="text"> + <string>Row 2 Text:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="row_2_text_combobox"/> </item> </layout> </item> </layout> - </widget> - </item> - <item> - <spacer name="verticalSpacer"> - <property name="orientation"> - <enum>Qt::Vertical</enum> - </property> - <property name="sizeHint" stdset="0"> - <size> - <width>20</width> - <height>40</height> - </size> - </property> - </spacer> - </item> - </layout> + </item> + </layout> + </widget> + </item> + <item> + <spacer name="verticalSpacer"> + <property name="orientation"> + <enum>Qt::Vertical</enum> + </property> + <property name="sizeHint" stdset="0"> + <size> + <width>20</width> + <height>40</height> + </size> + </property> + </spacer> </item> </layout> </widget> diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 9bb0a0109..f391a41a9 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -2,9 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <fmt/format.h> #include "yuzu/debugger/wait_tree.h" +#include "yuzu/uisettings.h" #include "yuzu/util/util.h" #include "common/assert.h" @@ -19,11 +21,37 @@ #include "core/hle/kernel/thread.h" #include "core/memory.h" +namespace { + +constexpr std::array<std::array<Qt::GlobalColor, 2>, 10> WaitTreeColors{{ + {Qt::GlobalColor::darkGreen, Qt::GlobalColor::green}, + {Qt::GlobalColor::darkGreen, Qt::GlobalColor::green}, + {Qt::GlobalColor::darkBlue, Qt::GlobalColor::cyan}, + {Qt::GlobalColor::lightGray, Qt::GlobalColor::lightGray}, + {Qt::GlobalColor::lightGray, Qt::GlobalColor::lightGray}, + {Qt::GlobalColor::darkRed, Qt::GlobalColor::red}, + {Qt::GlobalColor::darkYellow, Qt::GlobalColor::yellow}, + {Qt::GlobalColor::red, Qt::GlobalColor::red}, + {Qt::GlobalColor::darkCyan, Qt::GlobalColor::cyan}, + {Qt::GlobalColor::gray, Qt::GlobalColor::gray}, +}}; + +bool IsDarkTheme() { + const auto& theme = UISettings::values.theme; + return theme == QStringLiteral("qdarkstyle") || theme == QStringLiteral("colorful_dark"); +} + +} // namespace + WaitTreeItem::WaitTreeItem() = default; WaitTreeItem::~WaitTreeItem() = default; QColor WaitTreeItem::GetColor() const { - return QColor(Qt::GlobalColor::black); + if (IsDarkTheme()) { + return QColor(Qt::GlobalColor::white); + } else { + return QColor(Qt::GlobalColor::black); + } } std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeItem::GetChildren() const { @@ -263,36 +291,38 @@ QString WaitTreeThread::GetText() const { } QColor WaitTreeThread::GetColor() const { + const std::size_t color_index = IsDarkTheme() ? 1 : 0; + const auto& thread = static_cast<const Kernel::Thread&>(object); switch (thread.GetStatus()) { case Kernel::ThreadStatus::Running: - return QColor(Qt::GlobalColor::darkGreen); + return QColor(WaitTreeColors[0][color_index]); case Kernel::ThreadStatus::Ready: if (!thread.IsPaused()) { if (thread.WasRunning()) { - return QColor(Qt::GlobalColor::darkGreen); + return QColor(WaitTreeColors[1][color_index]); } else { - return QColor(Qt::GlobalColor::darkBlue); + return QColor(WaitTreeColors[2][color_index]); } } else { - return QColor(Qt::GlobalColor::lightGray); + return QColor(WaitTreeColors[3][color_index]); } case Kernel::ThreadStatus::Paused: - return QColor(Qt::GlobalColor::lightGray); + return QColor(WaitTreeColors[4][color_index]); case Kernel::ThreadStatus::WaitHLEEvent: case Kernel::ThreadStatus::WaitIPC: - return QColor(Qt::GlobalColor::darkRed); + return QColor(WaitTreeColors[5][color_index]); case Kernel::ThreadStatus::WaitSleep: - return QColor(Qt::GlobalColor::darkYellow); + return QColor(WaitTreeColors[6][color_index]); case Kernel::ThreadStatus::WaitSynch: case Kernel::ThreadStatus::WaitMutex: case Kernel::ThreadStatus::WaitCondVar: case Kernel::ThreadStatus::WaitArb: - return QColor(Qt::GlobalColor::red); + return QColor(WaitTreeColors[7][color_index]); case Kernel::ThreadStatus::Dormant: - return QColor(Qt::GlobalColor::darkCyan); + return QColor(WaitTreeColors[8][color_index]); case Kernel::ThreadStatus::Dead: - return QColor(Qt::GlobalColor::gray); + return QColor(WaitTreeColors[9][color_index]); default: return WaitTreeItem::GetColor(); } diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 9f758605a..31a635176 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -94,6 +94,8 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" +#include "video_core/gpu.h" +#include "video_core/shader_notify.h" #include "yuzu/about_dialog.h" #include "yuzu/bootmanager.h" #include "yuzu/compatdb.h" @@ -189,6 +191,8 @@ GMainWindow::GMainWindow() provider(std::make_unique<FileSys::ManualContentProvider>()) { InitializeLogging(); + LoadTranslation(); + setAcceptDrops(true); ui.setupUi(this); statusBar()->hide(); @@ -498,6 +502,8 @@ void GMainWindow::InitializeWidgets() { message_label->setAlignment(Qt::AlignLeft); statusBar()->addPermanentWidget(message_label, 1); + shader_building_label = new QLabel(); + shader_building_label->setToolTip(tr("The amount of shaders currently being built")); emu_speed_label = new QLabel(); emu_speed_label->setToolTip( tr("Current emulation speed. Values higher or lower than 100% " @@ -510,7 +516,8 @@ void GMainWindow::InitializeWidgets() { tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For " "full-speed emulation this should be at most 16.67 ms.")); - for (auto& label : {emu_speed_label, game_fps_label, emu_frametime_label}) { + for (auto& label : + {shader_building_label, emu_speed_label, game_fps_label, emu_frametime_label}) { label->setVisible(false); label->setFrameStyle(QFrame::NoFrame); label->setContentsMargins(4, 0, 4, 0); @@ -1176,6 +1183,7 @@ void GMainWindow::ShutdownGame() { // Disable status bar updates status_bar_update_timer.stop(); + shader_building_label->setVisible(false); emu_speed_label->setVisible(false); game_fps_label->setVisible(false); emu_frametime_label->setVisible(false); @@ -2042,6 +2050,9 @@ void GMainWindow::OnConfigure() { const bool old_discord_presence = UISettings::values.enable_discord_presence; ConfigureDialog configure_dialog(this, hotkey_registry); + connect(&configure_dialog, &ConfigureDialog::LanguageChanged, this, + &GMainWindow::OnLanguageChanged); + const auto result = configure_dialog.exec(); if (result != QDialog::Accepted) { return; @@ -2186,6 +2197,17 @@ void GMainWindow::UpdateStatusBar() { } auto results = Core::System::GetInstance().GetAndResetPerfStats(); + auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify(); + const auto shaders_building = shader_notify.GetShadersBuilding(); + + if (shaders_building != 0) { + shader_building_label->setText( + tr("Building: %1 shader").arg(shaders_building) + + (shaders_building != 1 ? QString::fromStdString("s") : QString::fromStdString(""))); + shader_building_label->setVisible(true); + } else { + shader_building_label->setVisible(false); + } if (Settings::values.use_frame_limit.GetValue()) { emu_speed_label->setText(tr("Speed: %1% / %2%") @@ -2315,9 +2337,12 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { if (behavior == ReinitializeKeyBehavior::Warning) { const auto res = QMessageBox::information( this, tr("Confirm Key Rederivation"), - tr("You are about to force rederive all of your keys. \nIf you do not know what this " - "means or what you are doing, \nthis is a potentially destructive action. \nPlease " - "make sure this is what you want \nand optionally make backups.\n\nThis will delete " + tr("You are about to force rederive all of your keys. \nIf you do not know what " + "this " + "means or what you are doing, \nthis is a potentially destructive action. " + "\nPlease " + "make sure this is what you want \nand optionally make backups.\n\nThis will " + "delete " "your autogenerated key files and re-run the key derivation module."), QMessageBox::StandardButtons{QMessageBox::Ok, QMessageBox::Cancel}); @@ -2600,6 +2625,43 @@ void GMainWindow::UpdateUITheme() { QIcon::setThemeSearchPaths(theme_paths); } +void GMainWindow::LoadTranslation() { + // If the selected language is English, no need to install any translation + if (UISettings::values.language == QStringLiteral("en")) { + return; + } + + bool loaded; + + if (UISettings::values.language.isEmpty()) { + // If the selected language is empty, use system locale + loaded = translator.load(QLocale(), {}, {}, QStringLiteral(":/languages/")); + } else { + // Otherwise load from the specified file + loaded = translator.load(UISettings::values.language, QStringLiteral(":/languages/")); + } + + if (loaded) { + qApp->installTranslator(&translator); + } else { + UISettings::values.language = QStringLiteral("en"); + } +} + +void GMainWindow::OnLanguageChanged(const QString& locale) { + if (UISettings::values.language != QStringLiteral("en")) { + qApp->removeTranslator(&translator); + } + + UISettings::values.language = locale; + LoadTranslation(); + ui.retranslateUi(this); + UpdateWindowTitle(); + + if (emulation_running) + ui.action_Start->setText(tr("Continue")); +} + void GMainWindow::SetDiscordEnabled([[maybe_unused]] bool state) { #ifdef USE_DISCORD_PRESENCE if (state) { @@ -2628,8 +2690,8 @@ int main(int argc, char* argv[]) { #ifdef __APPLE__ // If you start a bundle (binary) on OSX without the Terminal, the working directory is "/". - // But since we require the working directory to be the executable path for the location of the - // user folder in the Qt Frontend, we need to cd into that working directory + // But since we require the working directory to be the executable path for the location of + // the user folder in the Qt Frontend, we need to cd into that working directory const std::string bin_path = FileUtil::GetBundleDirectory() + DIR_SEP + ".."; chdir(bin_path.c_str()); #endif diff --git a/src/yuzu/main.h b/src/yuzu/main.h index adff65fb5..db573d606 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -10,6 +10,7 @@ #include <QMainWindow> #include <QTimer> +#include <QTranslator> #include "common/common_types.h" #include "core/core.h" @@ -225,6 +226,7 @@ private slots: void OnCaptureScreenshot(); void OnCoreError(Core::System::ResultStatus, std::string); void OnReinitializeKeys(ReinitializeKeyBehavior behavior); + void OnLanguageChanged(const QString& locale); private: std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); @@ -237,6 +239,7 @@ private: void HideMouseCursor(); void ShowMouseCursor(); void OpenURL(const QUrl& url); + void LoadTranslation(); Ui::MainWindow ui; @@ -248,6 +251,7 @@ private: // Status bar elements QLabel* message_label = nullptr; + QLabel* shader_building_label = nullptr; QLabel* emu_speed_label = nullptr; QLabel* game_fps_label = nullptr; QLabel* emu_frametime_label = nullptr; @@ -284,6 +288,8 @@ private: HotkeyRegistry hotkey_registry; + QTranslator translator; + // Install progress dialog QProgressDialog* install_progress; diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index 830932d45..6cc65736d 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h @@ -75,6 +75,7 @@ struct Values { bool game_dir_deprecated_deepscan; QVector<UISettings::GameDir> game_dirs; QStringList recent_files; + QString language; QString theme; diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 7773228c8..c2a2982fb 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -394,6 +394,10 @@ void Config::ReadValues() { static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1))); Settings::values.use_assembly_shaders.SetValue( sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false)); + Settings::values.use_asynchronous_shaders.SetValue( + sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); + Settings::values.use_asynchronous_shaders.SetValue( + sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); Settings::values.use_fast_gpu_time.SetValue( sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true)); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 5bed47fd7..aa9e40380 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -166,6 +166,10 @@ use_vsync = # 0 (default): Off, 1: On use_assembly_shaders = +# Whether to allow asynchronous shader building. +# 0 (default): Off, 1: On +use_asynchronous_shaders = + # Turns on the frame limiter, which will limit frames output to the target game speed # 0: Off, 1: On (default) use_frame_limit = |