diff options
72 files changed, 1199 insertions, 871 deletions
| diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 61dddab3f..e40d117d6 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -2,13 +2,10 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -#include <algorithm>  #include <atomic>  #include <chrono>  #include <climits> -#include <condition_variable> -#include <memory> -#include <mutex> +#include <exception>  #include <thread>  #include <vector> @@ -16,104 +13,230 @@  #include <windows.h> // For OutputDebugStringW  #endif -#include "common/assert.h"  #include "common/fs/file.h"  #include "common/fs/fs.h" +#include "common/fs/fs_paths.h" +#include "common/fs/path_util.h"  #include "common/literals.h" +#include "common/thread.h"  #include "common/logging/backend.h"  #include "common/logging/log.h"  #include "common/logging/text_formatter.h"  #include "common/settings.h" +#ifdef _WIN32  #include "common/string_util.h" +#endif  #include "common/threadsafe_queue.h"  namespace Common::Log { +namespace { +  /** - * Static state as a singleton. + * Interface for logging backends.   */ -class Impl { +class Backend {  public: -    static Impl& Instance() { -        static Impl backend; -        return backend; +    virtual ~Backend() = default; + +    virtual void Write(const Entry& entry) = 0; + +    virtual void EnableForStacktrace() = 0; + +    virtual void Flush() = 0; +}; + +/** + * Backend that writes to stderr and with color + */ +class ColorConsoleBackend final : public Backend { +public: +    explicit ColorConsoleBackend() = default; + +    ~ColorConsoleBackend() override = default; + +    void Write(const Entry& entry) override { +        if (enabled.load(std::memory_order_relaxed)) { +            PrintColoredMessage(entry); +        }      } -    Impl(const Impl&) = delete; -    Impl& operator=(const Impl&) = delete; +    void Flush() override { +        // stderr shouldn't be buffered +    } -    Impl(Impl&&) = delete; -    Impl& operator=(Impl&&) = delete; +    void EnableForStacktrace() override { +        enabled = true; +    } -    void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, -                   const char* function, std::string message) { -        message_queue.Push( -            CreateEntry(log_class, log_level, filename, line_num, function, std::move(message))); +    void SetEnabled(bool enabled_) { +        enabled = enabled_; +    } + +private: +    std::atomic_bool enabled{false}; +}; + +/** + * Backend that writes to a file passed into the constructor + */ +class FileBackend final : public Backend { +public: +    explicit FileBackend(const std::filesystem::path& filename) { +        auto old_filename = filename; +        old_filename += ".old.txt"; + +        // Existence checks are done within the functions themselves. +        // We don't particularly care if these succeed or not. +        static_cast<void>(FS::RemoveFile(old_filename)); +        static_cast<void>(FS::RenameFile(filename, old_filename)); + +        file = std::make_unique<FS::IOFile>(filename, FS::FileAccessMode::Write, +                                            FS::FileType::TextFile); +    } + +    ~FileBackend() override = default; + +    void Write(const Entry& entry) override { +        if (!enabled) { +            return; +        } + +        bytes_written += file->WriteString(FormatLogMessage(entry).append(1, '\n')); + +        using namespace Common::Literals; +        // Prevent logs from exceeding a set maximum size in the event that log entries are spammed. +        const auto write_limit = Settings::values.extended_logging ? 1_GiB : 100_MiB; +        const bool write_limit_exceeded = bytes_written > write_limit; +        if (entry.log_level >= Level::Error || write_limit_exceeded) { +            if (write_limit_exceeded) { +                // Stop writing after the write limit is exceeded. +                // Don't close the file so we can print a stacktrace if necessary +                enabled = false; +            } +            file->Flush(); +        } +    } + +    void Flush() override { +        file->Flush(); +    } + +    void EnableForStacktrace() override { +        enabled = true; +        bytes_written = 0;      } -    void AddBackend(std::unique_ptr<Backend> backend) { -        std::lock_guard lock{writing_mutex}; -        backends.push_back(std::move(backend)); +private: +    std::unique_ptr<FS::IOFile> file; +    bool enabled = true; +    std::size_t bytes_written = 0; +}; + +/** + * Backend that writes to Visual Studio's output window + */ +class DebuggerBackend final : public Backend { +public: +    explicit DebuggerBackend() = default; + +    ~DebuggerBackend() override = default; + +    void Write(const Entry& entry) override { +#ifdef _WIN32 +        ::OutputDebugStringW(UTF8ToUTF16W(FormatLogMessage(entry).append(1, '\n')).c_str()); +#endif      } -    void RemoveBackend(std::string_view backend_name) { -        std::lock_guard lock{writing_mutex}; +    void Flush() override {} + +    void EnableForStacktrace() override {} +}; + +bool initialization_in_progress_suppress_logging = true; -        std::erase_if(backends, [&backend_name](const auto& backend) { -            return backend_name == backend->GetName(); -        }); +/** + * Static state as a singleton. + */ +class Impl { +public: +    static Impl& Instance() { +        if (!instance) { +            throw std::runtime_error("Using Logging instance before its initialization"); +        } +        return *instance;      } -    const Filter& GetGlobalFilter() const { -        return filter; +    static void Initialize() { +        if (instance) { +            LOG_WARNING(Log, "Reinitializing logging backend"); +            return; +        } +        using namespace Common::FS; +        const auto& log_dir = GetYuzuPath(YuzuPath::LogDir); +        void(CreateDir(log_dir)); +        Filter filter; +        filter.ParseFilterString(Settings::values.log_filter.GetValue()); +        instance = std::unique_ptr<Impl, decltype(&Deleter)>(new Impl(log_dir / LOG_FILE, filter), +                                                             Deleter); +        initialization_in_progress_suppress_logging = false;      } +    Impl(const Impl&) = delete; +    Impl& operator=(const Impl&) = delete; + +    Impl(Impl&&) = delete; +    Impl& operator=(Impl&&) = delete; +      void SetGlobalFilter(const Filter& f) {          filter = f;      } -    Backend* GetBackend(std::string_view backend_name) { -        const auto it = -            std::find_if(backends.begin(), backends.end(), -                         [&backend_name](const auto& i) { return backend_name == i->GetName(); }); -        if (it == backends.end()) -            return nullptr; -        return it->get(); +    void SetColorConsoleBackendEnabled(bool enabled) { +        color_console_backend.SetEnabled(enabled); +    } + +    void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, +                   const char* function, std::string message) { +        if (!filter.CheckMessage(log_class, log_level)) +            return; +        const Entry& entry = +            CreateEntry(log_class, log_level, filename, line_num, function, std::move(message)); +        message_queue.Push(entry);      }  private: -    Impl() { -        backend_thread = std::thread([&] { -            Entry entry; -            auto write_logs = [&](Entry& e) { -                std::lock_guard lock{writing_mutex}; -                for (const auto& backend : backends) { -                    backend->Write(e); -                } -            }; -            while (true) { -                entry = message_queue.PopWait(); -                if (entry.final_entry) { -                    break; -                } -                write_logs(entry); -            } +    Impl(const std::filesystem::path& file_backend_filename, const Filter& filter_) +        : filter{filter_}, file_backend{file_backend_filename}, backend_thread{std::thread([this] { +              Common::SetCurrentThreadName("yuzu:Log"); +              Entry entry; +              const auto write_logs = [this, &entry]() { +                  ForEachBackend([&entry](Backend& backend) { backend.Write(entry); }); +              }; +              while (true) { +                  entry = message_queue.PopWait(); +                  if (entry.final_entry) { +                      break; +                  } +                  write_logs(); +              } +              // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a +              // case where a system is repeatedly spamming logs even on close. +              int max_logs_to_write = filter.IsDebug() ? INT_MAX : 100; +              while (max_logs_to_write-- && message_queue.Pop(entry)) { +                  write_logs(); +              } +          })} {} -            // Drain the logging queue. Only writes out up to MAX_LOGS_TO_WRITE to prevent a -            // case where a system is repeatedly spamming logs even on close. -            const int MAX_LOGS_TO_WRITE = filter.IsDebug() ? INT_MAX : 100; -            int logs_written = 0; -            while (logs_written++ < MAX_LOGS_TO_WRITE && message_queue.Pop(entry)) { -                write_logs(entry); -            } -        }); +    ~Impl() { +        StopBackendThread();      } -    ~Impl() { -        Entry entry; -        entry.final_entry = true; -        message_queue.Push(entry); +    void StopBackendThread() { +        Entry stop_entry{}; +        stop_entry.final_entry = true; +        message_queue.Push(stop_entry);          backend_thread.join();      } @@ -135,100 +258,51 @@ private:          };      } -    std::mutex writing_mutex; -    std::thread backend_thread; -    std::vector<std::unique_ptr<Backend>> backends; -    MPSCQueue<Entry> message_queue; -    Filter filter; -    std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; -}; - -ConsoleBackend::~ConsoleBackend() = default; - -void ConsoleBackend::Write(const Entry& entry) { -    PrintMessage(entry); -} - -ColorConsoleBackend::~ColorConsoleBackend() = default; - -void ColorConsoleBackend::Write(const Entry& entry) { -    PrintColoredMessage(entry); -} - -FileBackend::FileBackend(const std::filesystem::path& filename) { -    auto old_filename = filename; -    old_filename += ".old.txt"; - -    // Existence checks are done within the functions themselves. -    // We don't particularly care if these succeed or not. -    FS::RemoveFile(old_filename); -    void(FS::RenameFile(filename, old_filename)); - -    file = -        std::make_unique<FS::IOFile>(filename, FS::FileAccessMode::Write, FS::FileType::TextFile); -} - -FileBackend::~FileBackend() = default; +    void ForEachBackend(auto lambda) { +        lambda(static_cast<Backend&>(debugger_backend)); +        lambda(static_cast<Backend&>(color_console_backend)); +        lambda(static_cast<Backend&>(file_backend)); +    } -void FileBackend::Write(const Entry& entry) { -    if (!file->IsOpen()) { -        return; +    static void Deleter(Impl* ptr) { +        delete ptr;      } -    using namespace Common::Literals; -    // Prevent logs from exceeding a set maximum size in the event that log entries are spammed. -    constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB; -    constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB; +    static inline std::unique_ptr<Impl, decltype(&Deleter)> instance{nullptr, Deleter}; -    const bool write_limit_exceeded = -        bytes_written > MAX_BYTES_WRITTEN_EXTENDED || -        (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging); +    Filter filter; +    DebuggerBackend debugger_backend{}; +    ColorConsoleBackend color_console_backend{}; +    FileBackend file_backend; -    // Close the file after the write limit is exceeded. -    if (write_limit_exceeded) { -        file->Close(); -        return; -    } +    std::thread backend_thread; +    MPSCQueue<Entry> message_queue{}; +    std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; +}; +} // namespace -    bytes_written += file->WriteString(FormatLogMessage(entry).append(1, '\n')); -    if (entry.log_level >= Level::Error) { -        file->Flush(); -    } +void Initialize() { +    Impl::Initialize();  } -DebuggerBackend::~DebuggerBackend() = default; - -void DebuggerBackend::Write(const Entry& entry) { -#ifdef _WIN32 -    ::OutputDebugStringW(UTF8ToUTF16W(FormatLogMessage(entry).append(1, '\n')).c_str()); -#endif +void DisableLoggingInTests() { +    initialization_in_progress_suppress_logging = true;  }  void SetGlobalFilter(const Filter& filter) {      Impl::Instance().SetGlobalFilter(filter);  } -void AddBackend(std::unique_ptr<Backend> backend) { -    Impl::Instance().AddBackend(std::move(backend)); -} - -void RemoveBackend(std::string_view backend_name) { -    Impl::Instance().RemoveBackend(backend_name); -} - -Backend* GetBackend(std::string_view backend_name) { -    return Impl::Instance().GetBackend(backend_name); +void SetColorConsoleBackendEnabled(bool enabled) { +    Impl::Instance().SetColorConsoleBackendEnabled(enabled);  }  void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,                         unsigned int line_num, const char* function, const char* format,                         const fmt::format_args& args) { -    auto& instance = Impl::Instance(); -    const auto& filter = instance.GetGlobalFilter(); -    if (!filter.CheckMessage(log_class, log_level)) -        return; - -    instance.PushEntry(log_class, log_level, filename, line_num, function, -                       fmt::vformat(format, args)); +    if (!initialization_in_progress_suppress_logging) { +        Impl::Instance().PushEntry(log_class, log_level, filename, line_num, function, +                                   fmt::vformat(format, args)); +    }  }  } // namespace Common::Log diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index 4b9a910c1..cb7839ee9 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h @@ -5,120 +5,21 @@  #pragma once  #include <filesystem> -#include <memory> -#include <string> -#include <string_view>  #include "common/logging/filter.h" -#include "common/logging/log.h" - -namespace Common::FS { -class IOFile; -}  namespace Common::Log {  class Filter; -/** - * Interface for logging backends. As loggers can be created and removed at runtime, this can be - * used by a frontend for adding a custom logging backend as needed - */ -class Backend { -public: -    virtual ~Backend() = default; - -    virtual void SetFilter(const Filter& new_filter) { -        filter = new_filter; -    } -    virtual const char* GetName() const = 0; -    virtual void Write(const Entry& entry) = 0; - -private: -    Filter filter; -}; - -/** - * Backend that writes to stderr without any color commands - */ -class ConsoleBackend : public Backend { -public: -    ~ConsoleBackend() override; - -    static const char* Name() { -        return "console"; -    } -    const char* GetName() const override { -        return Name(); -    } -    void Write(const Entry& entry) override; -}; - -/** - * Backend that writes to stderr and with color - */ -class ColorConsoleBackend : public Backend { -public: -    ~ColorConsoleBackend() override; - -    static const char* Name() { -        return "color_console"; -    } - -    const char* GetName() const override { -        return Name(); -    } -    void Write(const Entry& entry) override; -}; +/// Initializes the logging system. This should be the first thing called in main. +void Initialize(); -/** - * Backend that writes to a file passed into the constructor - */ -class FileBackend : public Backend { -public: -    explicit FileBackend(const std::filesystem::path& filename); -    ~FileBackend() override; - -    static const char* Name() { -        return "file"; -    } - -    const char* GetName() const override { -        return Name(); -    } - -    void Write(const Entry& entry) override; - -private: -    std::unique_ptr<FS::IOFile> file; -    std::size_t bytes_written = 0; -}; - -/** - * Backend that writes to Visual Studio's output window - */ -class DebuggerBackend : public Backend { -public: -    ~DebuggerBackend() override; - -    static const char* Name() { -        return "debugger"; -    } -    const char* GetName() const override { -        return Name(); -    } -    void Write(const Entry& entry) override; -}; - -void AddBackend(std::unique_ptr<Backend> backend); - -void RemoveBackend(std::string_view backend_name); - -Backend* GetBackend(std::string_view backend_name); +void DisableLoggingInTests();  /** - * The global filter will prevent any messages from even being processed if they are filtered. Each - * backend can have a filter, but if the level is lower than the global filter, the backend will - * never get the message + * The global filter will prevent any messages from even being processed if they are filtered.   */  void SetGlobalFilter(const Filter& filter); -} // namespace Common::Log
\ No newline at end of file + +void SetColorConsoleBackendEnabled(bool enabled); +} // namespace Common::Log diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index f055f0e11..42744c994 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -111,6 +111,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {      SUB(Service, NCM)                                                                              \      SUB(Service, NFC)                                                                              \      SUB(Service, NFP)                                                                              \ +    SUB(Service, NGCT)                                                                             \      SUB(Service, NIFM)                                                                             \      SUB(Service, NIM)                                                                              \      SUB(Service, NPNS)                                                                             \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index 7ad0334fc..ddf9d27ca 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -81,6 +81,7 @@ enum class Class : u8 {      Service_NCM,       ///< The NCM service      Service_NFC,       ///< The NFC (Near-field communication) service      Service_NFP,       ///< The NFP service +    Service_NGCT,      ///< The NGCT (No Good Content for Terra) service      Service_NIFM,      ///< The NIFM (Network interface) service      Service_NIM,       ///< The NIM service      Service_NPNS,      ///< The NPNS service diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h new file mode 100644 index 000000000..365488ba5 --- /dev/null +++ b/src/common/lru_cache.h @@ -0,0 +1,140 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2+ or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <deque> +#include <memory> +#include <type_traits> + +#include "common/common_types.h" + +namespace Common { + +template <class Traits> +class LeastRecentlyUsedCache { +    using ObjectType = typename Traits::ObjectType; +    using TickType = typename Traits::TickType; + +    struct Item { +        ObjectType obj; +        TickType tick; +        Item* next{}; +        Item* prev{}; +    }; + +public: +    LeastRecentlyUsedCache() : first_item{}, last_item{} {} +    ~LeastRecentlyUsedCache() = default; + +    size_t Insert(ObjectType obj, TickType tick) { +        const auto new_id = Build(); +        auto& item = item_pool[new_id]; +        item.obj = obj; +        item.tick = tick; +        Attach(item); +        return new_id; +    } + +    void Touch(size_t id, TickType tick) { +        auto& item = item_pool[id]; +        if (item.tick >= tick) { +            return; +        } +        item.tick = tick; +        if (&item == last_item) { +            return; +        } +        Detach(item); +        Attach(item); +    } + +    void Free(size_t id) { +        auto& item = item_pool[id]; +        Detach(item); +        item.prev = nullptr; +        item.next = nullptr; +        free_items.push_back(id); +    } + +    template <typename Func> +    void ForEachItemBelow(TickType tick, Func&& func) { +        static constexpr bool RETURNS_BOOL = +            std::is_same_v<std::invoke_result<Func, ObjectType>, bool>; +        Item* iterator = first_item; +        while (iterator) { +            if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) { +                return; +            } +            Item* next = iterator->next; +            if constexpr (RETURNS_BOOL) { +                if (func(iterator->obj)) { +                    return; +                } +            } else { +                func(iterator->obj); +            } +            iterator = next; +        } +    } + +private: +    size_t Build() { +        if (free_items.empty()) { +            const size_t item_id = item_pool.size(); +            auto& item = item_pool.emplace_back(); +            item.next = nullptr; +            item.prev = nullptr; +            return item_id; +        } +        const size_t item_id = free_items.front(); +        free_items.pop_front(); +        auto& item = item_pool[item_id]; +        item.next = nullptr; +        item.prev = nullptr; +        return item_id; +    } + +    void Attach(Item& item) { +        if (!first_item) { +            first_item = &item; +        } +        if (!last_item) { +            last_item = &item; +        } else { +            item.prev = last_item; +            last_item->next = &item; +            item.next = nullptr; +            last_item = &item; +        } +    } + +    void Detach(Item& item) { +        if (item.prev) { +            item.prev->next = item.next; +        } +        if (item.next) { +            item.next->prev = item.prev; +        } +        if (&item == first_item) { +            first_item = item.next; +            if (first_item) { +                first_item->prev = nullptr; +            } +        } +        if (&item == last_item) { +            last_item = item.prev; +            if (last_item) { +                last_item->next = nullptr; +            } +        } +    } + +    std::deque<Item> item_pool; +    std::deque<size_t> free_items; +    Item* first_item{}; +    Item* last_item{}; +}; + +} // namespace Common diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 996315999..fd3b639cd 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -59,7 +59,6 @@ void LogSettings() {      log_setting("Renderer_UseVsync", values.use_vsync.GetValue());      log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());      log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); -    log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());      log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());      log_setting("Audio_OutputEngine", values.sink_id.GetValue());      log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); @@ -143,7 +142,6 @@ void RestoreGlobalState(bool is_powered_on) {      values.shader_backend.SetGlobal(true);      values.use_asynchronous_shaders.SetGlobal(true);      values.use_fast_gpu_time.SetGlobal(true); -    values.use_caches_gc.SetGlobal(true);      values.bg_red.SetGlobal(true);      values.bg_green.SetGlobal(true);      values.bg_blue.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index c65746749..ec4d381e8 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -475,7 +475,6 @@ struct Values {                                                  ShaderBackend::SPIRV, "shader_backend"};      Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};      Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; -    Setting<bool> use_caches_gc{false, "use_caches_gc"};      Setting<u8> bg_red{0, "bg_red"};      Setting<u8> bg_green{0, "bg_green"}; @@ -489,7 +488,7 @@ struct Values {      std::chrono::seconds custom_rtc_differential;      BasicSetting<s32> current_user{0, "current_user"}; -    RangedSetting<s32> language_index{1, 0, 16, "language_index"}; +    RangedSetting<s32> language_index{1, 0, 17, "language_index"};      RangedSetting<s32> region_index{1, 0, 6, "region_index"};      RangedSetting<s32> time_zone_index{0, 0, 45, "time_zone_index"};      RangedSetting<s32> sound_index{1, 0, 2, "sound_index"}; diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f5cf5c16a..87d47e2e5 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -452,6 +452,8 @@ add_library(core STATIC      hle/service/nfp/nfp.h      hle/service/nfp/nfp_user.cpp      hle/service/nfp/nfp_user.h +    hle/service/ngct/ngct.cpp +    hle/service/ngct/ngct.h      hle/service/nifm/nifm.cpp      hle/service/nifm/nifm.h      hle/service/nim/nim.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index 5d8a61b3a..ba4629993 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -4,6 +4,7 @@  #include <array>  #include <atomic> +#include <exception>  #include <memory>  #include <utility> @@ -84,8 +85,6 @@ FileSys::StorageId GetStorageIdForFrontendSlot(  } // Anonymous namespace -/*static*/ System System::s_instance; -  FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,                                           const std::string& path) {      // To account for split 00+01+etc files. @@ -425,6 +424,20 @@ struct System::Impl {  System::System() : impl{std::make_unique<Impl>(*this)} {}  System::~System() = default; +System& System::GetInstance() { +    if (!s_instance) { +        throw std::runtime_error("Using System instance before its initialization"); +    } +    return *s_instance; +} + +void System::InitializeGlobalInstance() { +    if (s_instance) { +        throw std::runtime_error("Reinitializing Global System instance."); +    } +    s_instance = std::unique_ptr<System>(new System); +} +  CpuManager& System::GetCpuManager() {      return impl->cpu_manager;  } @@ -494,6 +507,12 @@ const ARM_Interface& System::CurrentArmInterface() const {      return impl->kernel.CurrentPhysicalCore().ArmInterface();  } +std::size_t System::CurrentCoreIndex() const { +    std::size_t core = impl->kernel.GetCurrentHostThreadID(); +    ASSERT(core < Core::Hardware::NUM_CPU_CORES); +    return core; +} +  Kernel::PhysicalCore& System::CurrentPhysicalCore() {      return impl->kernel.CurrentPhysicalCore();  } diff --git a/src/core/core.h b/src/core/core.h index cd9af0c07..715ab88e7 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -120,9 +120,9 @@ public:       * Gets the instance of the System singleton class.       * @returns Reference to the instance of the System singleton class.       */ -    [[deprecated("Use of the global system instance is deprecated")]] static System& GetInstance() { -        return s_instance; -    } +    [[deprecated("Use of the global system instance is deprecated")]] static System& GetInstance(); + +    static void InitializeGlobalInstance();      /// Enumeration representing the return values of the System Initialize and Load process.      enum class ResultStatus : u32 { @@ -205,6 +205,9 @@ public:      /// Gets an ARM interface to the CPU core that is currently running      [[nodiscard]] const ARM_Interface& CurrentArmInterface() const; +    /// Gets the index of the currently running CPU core +    [[nodiscard]] std::size_t CurrentCoreIndex() const; +      /// Gets the physical core for the CPU core that is currently running      [[nodiscard]] Kernel::PhysicalCore& CurrentPhysicalCore(); @@ -393,7 +396,7 @@ private:      struct Impl;      std::unique_ptr<Impl> impl; -    static System s_instance; +    inline static std::unique_ptr<System> s_instance{};  };  } // namespace Core diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index de2e5563e..77efcabf0 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -118,18 +118,17 @@ void CpuManager::MultiCoreRunGuestLoop() {              physical_core = &kernel.CurrentPhysicalCore();          }          system.ExitDynarmicProfile(); -        { -            Kernel::KScopedDisableDispatch dd(kernel); -            physical_core->ArmInterface().ClearExclusiveState(); -        } +        physical_core->ArmInterface().ClearExclusiveState(); +        kernel.CurrentScheduler()->RescheduleCurrentCore();      }  }  void CpuManager::MultiCoreRunIdleThread() {      auto& kernel = system.Kernel();      while (true) { -        Kernel::KScopedDisableDispatch dd(kernel); -        kernel.CurrentPhysicalCore().Idle(); +        auto& physical_core = kernel.CurrentPhysicalCore(); +        physical_core.Idle(); +        kernel.CurrentScheduler()->RescheduleCurrentCore();      }  } @@ -137,12 +136,12 @@ void CpuManager::MultiCoreRunSuspendThread() {      auto& kernel = system.Kernel();      kernel.CurrentScheduler()->OnThreadStart();      while (true) { -        auto core = kernel.CurrentPhysicalCoreIndex(); +        auto core = kernel.GetCurrentHostThreadID();          auto& scheduler = *kernel.CurrentScheduler();          Kernel::KThread* current_thread = scheduler.GetCurrentThread();          Common::Fiber::YieldTo(current_thread->GetHostContext(), *core_data[core].host_context);          ASSERT(scheduler.ContextSwitchPending()); -        ASSERT(core == kernel.CurrentPhysicalCoreIndex()); +        ASSERT(core == kernel.GetCurrentHostThreadID());          scheduler.RescheduleCurrentCore();      }  } @@ -348,11 +347,15 @@ void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) {              sc_sync_first_use = false;          } -        // Emulation was stopped -        if (stop_token.stop_requested()) { +        // Abort if emulation was killed before the session really starts +        if (!system.IsPoweredOn()) {              return;          } +        if (stop_token.stop_requested()) { +            break; +        } +          auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();          data.is_running = true;          Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext()); diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index 6771ef621..1b429bc1e 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp @@ -28,7 +28,7 @@ bool ReadFromUser(Core::System& system, s32* out, VAddr address) {  bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 value) {      auto& monitor = system.Monitor(); -    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); +    const auto current_core = system.CurrentCoreIndex();      // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.      // TODO(bunnei): We should call CanAccessAtomic(..) here. @@ -58,7 +58,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu  bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32 new_value) {      auto& monitor = system.Monitor(); -    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); +    const auto current_core = system.CurrentCoreIndex();      // TODO(bunnei): We should disable interrupts here via KScopedInterruptDisable.      // TODO(bunnei): We should call CanAccessAtomic(..) here. diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h index 165b76747..e4fcdbc67 100644 --- a/src/core/hle/kernel/k_auto_object.h +++ b/src/core/hle/kernel/k_auto_object.h @@ -170,10 +170,6 @@ public:          }      } -    const std::string& GetName() const { -        return name; -    } -  private:      void RegisterWithKernel();      void UnregisterWithKernel(); diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index 4174f35fd..ef14ad1d2 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -35,7 +35,7 @@ bool WriteToUser(Core::System& system, VAddr address, const u32* p) {  bool UpdateLockAtomic(Core::System& system, u32* out, VAddr address, u32 if_zero,                        u32 new_orr_mask) {      auto& monitor = system.Monitor(); -    const auto current_core = system.Kernel().CurrentPhysicalCoreIndex(); +    const auto current_core = system.CurrentCoreIndex();      // Load the value from the address.      const auto expected = monitor.ExclusiveRead32(current_core, address); diff --git a/src/core/hle/kernel/k_handle_table.cpp b/src/core/hle/kernel/k_handle_table.cpp index d720c2dda..6a420d5b0 100644 --- a/src/core/hle/kernel/k_handle_table.cpp +++ b/src/core/hle/kernel/k_handle_table.cpp @@ -13,7 +13,6 @@ ResultCode KHandleTable::Finalize() {      // Get the table and clear our record of it.      u16 saved_table_size = 0;      { -        KScopedDisableDispatch dd(kernel);          KScopedSpinLock lk(m_lock);          std::swap(m_table_size, saved_table_size); @@ -44,7 +43,6 @@ bool KHandleTable::Remove(Handle handle) {      // Find the object and free the entry.      KAutoObject* obj = nullptr;      { -        KScopedDisableDispatch dd(kernel);          KScopedSpinLock lk(m_lock);          if (this->IsValidHandle(handle)) { @@ -63,7 +61,6 @@ bool KHandleTable::Remove(Handle handle) {  }  ResultCode KHandleTable::Add(Handle* out_handle, KAutoObject* obj, u16 type) { -    KScopedDisableDispatch dd(kernel);      KScopedSpinLock lk(m_lock);      // Never exceed our capacity. @@ -86,7 +83,6 @@ ResultCode KHandleTable::Add(Handle* out_handle, KAutoObject* obj, u16 type) {  }  ResultCode KHandleTable::Reserve(Handle* out_handle) { -    KScopedDisableDispatch dd(kernel);      KScopedSpinLock lk(m_lock);      // Never exceed our capacity. @@ -97,7 +93,6 @@ ResultCode KHandleTable::Reserve(Handle* out_handle) {  }  void KHandleTable::Unreserve(Handle handle) { -    KScopedDisableDispatch dd(kernel);      KScopedSpinLock lk(m_lock);      // Unpack the handle. @@ -116,7 +111,6 @@ void KHandleTable::Unreserve(Handle handle) {  }  void KHandleTable::Register(Handle handle, KAutoObject* obj, u16 type) { -    KScopedDisableDispatch dd(kernel);      KScopedSpinLock lk(m_lock);      // Unpack the handle. diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h index 75dcec7df..2ff6aa160 100644 --- a/src/core/hle/kernel/k_handle_table.h +++ b/src/core/hle/kernel/k_handle_table.h @@ -69,7 +69,6 @@ public:      template <typename T = KAutoObject>      KScopedAutoObject<T> GetObjectWithoutPseudoHandle(Handle handle) const {          // Lock and look up in table. -        KScopedDisableDispatch dd(kernel);          KScopedSpinLock lk(m_lock);          if constexpr (std::is_same_v<T, KAutoObject>) { @@ -124,7 +123,6 @@ public:          size_t num_opened;          {              // Lock the table. -            KScopedDisableDispatch dd(kernel);              KScopedSpinLock lk(m_lock);              for (num_opened = 0; num_opened < num_handles; num_opened++) {                  // Get the current handle. diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 3d7e6707e..8ead1a769 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -59,7 +59,6 @@ void SetupMainThread(Core::System& system, KProcess& owner_process, u32 priority      thread->GetContext64().cpu_registers[0] = 0;      thread->GetContext32().cpu_registers[1] = thread_handle;      thread->GetContext64().cpu_registers[1] = thread_handle; -    thread->DisableDispatch();      auto& kernel = system.Kernel();      // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index 6ddbae52c..6a7d80d03 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -376,18 +376,20 @@ void KScheduler::ClearSchedulerUpdateNeeded(KernelCore& kernel) {  }  void KScheduler::DisableScheduling(KernelCore& kernel) { -    ASSERT(GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() >= 0); -    GetCurrentThreadPointer(kernel)->DisableDispatch(); +    if (auto* scheduler = kernel.CurrentScheduler(); scheduler) { +        ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 0); +        scheduler->GetCurrentThread()->DisableDispatch(); +    }  }  void KScheduler::EnableScheduling(KernelCore& kernel, u64 cores_needing_scheduling) { -    ASSERT(GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() >= 1); - -    if (GetCurrentThreadPointer(kernel)->GetDisableDispatchCount() > 1) { -        GetCurrentThreadPointer(kernel)->EnableDispatch(); -    } else { -        RescheduleCores(kernel, cores_needing_scheduling); +    if (auto* scheduler = kernel.CurrentScheduler(); scheduler) { +        ASSERT(scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 1); +        if (scheduler->GetCurrentThread()->GetDisableDispatchCount() >= 1) { +            scheduler->GetCurrentThread()->EnableDispatch(); +        }      } +    RescheduleCores(kernel, cores_needing_scheduling);  }  u64 KScheduler::UpdateHighestPriorityThreads(KernelCore& kernel) { @@ -615,17 +617,13 @@ KScheduler::KScheduler(Core::System& system_, s32 core_id_) : system{system_}, c      state.highest_priority_thread = nullptr;  } -void KScheduler::Finalize() { +KScheduler::~KScheduler() {      if (idle_thread) {          idle_thread->Close();          idle_thread = nullptr;      }  } -KScheduler::~KScheduler() { -    ASSERT(!idle_thread); -} -  KThread* KScheduler::GetCurrentThread() const {      if (auto result = current_thread.load(); result) {          return result; @@ -644,12 +642,10 @@ void KScheduler::RescheduleCurrentCore() {      if (phys_core.IsInterrupted()) {          phys_core.ClearInterrupt();      } -      guard.Lock();      if (state.needs_scheduling.load()) {          Schedule();      } else { -        GetCurrentThread()->EnableDispatch();          guard.Unlock();      }  } @@ -659,33 +655,26 @@ void KScheduler::OnThreadStart() {  }  void KScheduler::Unload(KThread* thread) { -    ASSERT(thread); -      LOG_TRACE(Kernel, "core {}, unload thread {}", core_id, thread ? thread->GetName() : "nullptr"); -    if (thread->IsCallingSvc()) { -        thread->ClearIsCallingSvc(); -    } - -    auto& physical_core = system.Kernel().PhysicalCore(core_id); -    if (!physical_core.IsInitialized()) { -        return; -    } - -    Core::ARM_Interface& cpu_core = physical_core.ArmInterface(); -    cpu_core.SaveContext(thread->GetContext32()); -    cpu_core.SaveContext(thread->GetContext64()); -    // Save the TPIDR_EL0 system register in case it was modified. -    thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); -    cpu_core.ClearExclusiveState(); - -    if (!thread->IsTerminationRequested() && thread->GetActiveCore() == core_id) { -        prev_thread = thread; -    } else { -        prev_thread = nullptr; +    if (thread) { +        if (thread->IsCallingSvc()) { +            thread->ClearIsCallingSvc(); +        } +        if (!thread->IsTerminationRequested()) { +            prev_thread = thread; + +            Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); +            cpu_core.SaveContext(thread->GetContext32()); +            cpu_core.SaveContext(thread->GetContext64()); +            // Save the TPIDR_EL0 system register in case it was modified. +            thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); +            cpu_core.ClearExclusiveState(); +        } else { +            prev_thread = nullptr; +        } +        thread->context_guard.Unlock();      } - -    thread->context_guard.Unlock();  }  void KScheduler::Reload(KThread* thread) { @@ -694,6 +683,11 @@ void KScheduler::Reload(KThread* thread) {      if (thread) {          ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable."); +        auto* const thread_owner_process = thread->GetOwnerProcess(); +        if (thread_owner_process != nullptr) { +            system.Kernel().MakeCurrentProcess(thread_owner_process); +        } +          Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);          cpu_core.LoadContext(thread->GetContext32());          cpu_core.LoadContext(thread->GetContext64()); @@ -711,7 +705,7 @@ void KScheduler::SwitchContextStep2() {  }  void KScheduler::ScheduleImpl() { -    KThread* previous_thread = GetCurrentThread(); +    KThread* previous_thread = current_thread.load();      KThread* next_thread = state.highest_priority_thread;      state.needs_scheduling = false; @@ -723,15 +717,10 @@ void KScheduler::ScheduleImpl() {      // If we're not actually switching thread, there's nothing to do.      if (next_thread == current_thread.load()) { -        previous_thread->EnableDispatch();          guard.Unlock();          return;      } -    if (next_thread->GetCurrentCore() != core_id) { -        next_thread->SetCurrentCore(core_id); -    } -      current_thread.store(next_thread);      KProcess* const previous_process = system.Kernel().CurrentProcess(); @@ -742,7 +731,11 @@ void KScheduler::ScheduleImpl() {      Unload(previous_thread);      std::shared_ptr<Common::Fiber>* old_context; -    old_context = &previous_thread->GetHostContext(); +    if (previous_thread != nullptr) { +        old_context = &previous_thread->GetHostContext(); +    } else { +        old_context = &idle_thread->GetHostContext(); +    }      guard.Unlock();      Common::Fiber::YieldTo(*old_context, *switch_fiber); diff --git a/src/core/hle/kernel/k_scheduler.h b/src/core/hle/kernel/k_scheduler.h index 516e0cdba..12cfae919 100644 --- a/src/core/hle/kernel/k_scheduler.h +++ b/src/core/hle/kernel/k_scheduler.h @@ -33,8 +33,6 @@ public:      explicit KScheduler(Core::System& system_, s32 core_id_);      ~KScheduler(); -    void Finalize(); -      /// Reschedules to the next available thread (call after current thread is suspended)      void RescheduleCurrentCore(); diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 0f6808ade..9f1d3156b 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -14,7 +14,6 @@  #include "common/fiber.h"  #include "common/logging/log.h"  #include "common/scope_exit.h" -#include "common/settings.h"  #include "common/thread_queue_list.h"  #include "core/core.h"  #include "core/cpu_manager.h" @@ -189,7 +188,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s      // Setup the stack parameters.      StackParameters& sp = GetStackParameters();      sp.cur_thread = this; -    sp.disable_count = 0; +    sp.disable_count = 1;      SetInExceptionHandler();      // Set thread ID. @@ -216,10 +215,9 @@ ResultCode KThread::InitializeThread(KThread* thread, KThreadFunction func, uint      // Initialize the thread.      R_TRY(thread->Initialize(func, arg, user_stack_top, prio, core, owner, type)); -    // Initialize emulation parameters. +    // Initialize host context.      thread->host_context =          std::make_shared<Common::Fiber>(std::move(init_func), init_func_parameter); -    thread->is_single_core = !Settings::values.use_multi_core.GetValue();      return ResultSuccess;  } @@ -972,9 +970,6 @@ ResultCode KThread::Run() {          // Set our state and finish.          SetState(ThreadState::Runnable); - -        DisableDispatch(); -          return ResultSuccess;      }  } @@ -1059,16 +1054,4 @@ s32 GetCurrentCoreId(KernelCore& kernel) {      return GetCurrentThread(kernel).GetCurrentCore();  } -KScopedDisableDispatch::~KScopedDisableDispatch() { -    if (GetCurrentThread(kernel).GetDisableDispatchCount() <= 1) { -        auto scheduler = kernel.CurrentScheduler(); - -        if (scheduler) { -            scheduler->RescheduleCurrentCore(); -        } -    } else { -        GetCurrentThread(kernel).EnableDispatch(); -    } -} -  } // namespace Kernel diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index e4c4c877d..c77f44ad4 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -450,39 +450,16 @@ public:          sleeping_queue = q;      } -    [[nodiscard]] bool IsKernelThread() const { -        return GetActiveCore() == 3; -    } - -    [[nodiscard]] bool IsDispatchTrackingDisabled() const { -        return is_single_core || IsKernelThread(); -    } -      [[nodiscard]] s32 GetDisableDispatchCount() const { -        if (IsDispatchTrackingDisabled()) { -            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch. -            return 1; -        } -          return this->GetStackParameters().disable_count;      }      void DisableDispatch() { -        if (IsDispatchTrackingDisabled()) { -            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch. -            return; -        } -          ASSERT(GetCurrentThread(kernel).GetDisableDispatchCount() >= 0);          this->GetStackParameters().disable_count++;      }      void EnableDispatch() { -        if (IsDispatchTrackingDisabled()) { -            // TODO(bunnei): Until kernel threads are emulated, we cannot enable/disable dispatch. -            return; -        } -          ASSERT(GetCurrentThread(kernel).GetDisableDispatchCount() > 0);          this->GetStackParameters().disable_count--;      } @@ -731,7 +708,6 @@ private:      // For emulation      std::shared_ptr<Common::Fiber> host_context{}; -    bool is_single_core{};      // For debugging      std::vector<KSynchronizationObject*> wait_objects_for_debugging; @@ -776,16 +752,4 @@ public:      }  }; -class KScopedDisableDispatch { -public: -    [[nodiscard]] explicit KScopedDisableDispatch(KernelCore& kernel_) : kernel{kernel_} { -        GetCurrentThread(kernel).DisableDispatch(); -    } - -    ~KScopedDisableDispatch(); - -private: -    KernelCore& kernel; -}; -  } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 8673384ee..bea945301 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -85,9 +85,8 @@ struct KernelCore::Impl {      }      void InitializeCores() { -        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { -            cores[core_id].Initialize(current_process->Is64BitProcess()); -            system.Memory().SetCurrentPageTable(*current_process, core_id); +        for (auto& core : cores) { +            core.Initialize(current_process->Is64BitProcess());          }      } @@ -132,6 +131,15 @@ struct KernelCore::Impl {          next_user_process_id = KProcess::ProcessIDMin;          next_thread_id = 1; +        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { +            if (suspend_threads[core_id]) { +                suspend_threads[core_id]->Close(); +                suspend_threads[core_id] = nullptr; +            } + +            schedulers[core_id].reset(); +        } +          cores.clear();          global_handle_table->Finalize(); @@ -159,16 +167,6 @@ struct KernelCore::Impl {          CleanupObject(time_shared_mem);          CleanupObject(system_resource_limit); -        for (u32 core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) { -            if (suspend_threads[core_id]) { -                suspend_threads[core_id]->Close(); -                suspend_threads[core_id] = nullptr; -            } - -            schedulers[core_id]->Finalize(); -            schedulers[core_id].reset(); -        } -          // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others          next_host_thread_id = Core::Hardware::NUM_CPU_CORES; @@ -259,22 +257,33 @@ struct KernelCore::Impl {      void MakeCurrentProcess(KProcess* process) {          current_process = process; +        if (process == nullptr) { +            return; +        } + +        const u32 core_id = GetCurrentHostThreadID(); +        if (core_id < Core::Hardware::NUM_CPU_CORES) { +            system.Memory().SetCurrentPageTable(*process, core_id); +        }      } -    /// Creates a new host thread ID, should only be called by GetHostThreadId -    u32 AllocateHostThreadId(std::optional<std::size_t> core_id) { -        if (core_id) { -            // The first for slots are reserved for CPU core threads -            ASSERT(*core_id < Core::Hardware::NUM_CPU_CORES); -            return static_cast<u32>(*core_id); -        } else { -            return next_host_thread_id++; +    static inline thread_local u32 host_thread_id = UINT32_MAX; + +    /// Gets the host thread ID for the caller, allocating a new one if this is the first time +    u32 GetHostThreadId(std::size_t core_id) { +        if (host_thread_id == UINT32_MAX) { +            // The first four slots are reserved for CPU core threads +            ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); +            host_thread_id = static_cast<u32>(core_id);          } +        return host_thread_id;      }      /// Gets the host thread ID for the caller, allocating a new one if this is the first time -    u32 GetHostThreadId(std::optional<std::size_t> core_id = std::nullopt) { -        const thread_local auto host_thread_id{AllocateHostThreadId(core_id)}; +    u32 GetHostThreadId() { +        if (host_thread_id == UINT32_MAX) { +            host_thread_id = next_host_thread_id++; +        }          return host_thread_id;      } @@ -818,20 +827,16 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {      return impl->cores[id];  } -size_t KernelCore::CurrentPhysicalCoreIndex() const { -    const u32 core_id = impl->GetCurrentHostThreadID(); -    if (core_id >= Core::Hardware::NUM_CPU_CORES) { -        return Core::Hardware::NUM_CPU_CORES - 1; -    } -    return core_id; -} -  Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() { -    return impl->cores[CurrentPhysicalCoreIndex()]; +    u32 core_id = impl->GetCurrentHostThreadID(); +    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); +    return impl->cores[core_id];  }  const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const { -    return impl->cores[CurrentPhysicalCoreIndex()]; +    u32 core_id = impl->GetCurrentHostThreadID(); +    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); +    return impl->cores[core_id];  }  Kernel::KScheduler* KernelCore::CurrentScheduler() { @@ -1024,9 +1029,6 @@ void KernelCore::Suspend(bool in_suspention) {              impl->suspend_threads[core_id]->SetState(state);              impl->suspend_threads[core_id]->SetWaitReasonForDebugging(                  ThreadWaitReasonForDebugging::Suspended); -            if (!should_suspend) { -                impl->suspend_threads[core_id]->DisableDispatch(); -            }          }      }  } @@ -1041,11 +1043,13 @@ void KernelCore::ExceptionalExit() {  }  void KernelCore::EnterSVCProfile() { -    impl->svc_ticks[CurrentPhysicalCoreIndex()] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC)); +    std::size_t core = impl->GetCurrentHostThreadID(); +    impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC));  }  void KernelCore::ExitSVCProfile() { -    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[CurrentPhysicalCoreIndex()]); +    std::size_t core = impl->GetCurrentHostThreadID(); +    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);  }  std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) { diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 57535433b..3a6db0b1c 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -146,9 +146,6 @@ public:      /// Gets the an instance of the respective physical CPU core.      const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; -    /// Gets the current physical core index for the running host thread. -    std::size_t CurrentPhysicalCoreIndex() const; -      /// Gets the sole instance of the Scheduler at the current running core.      Kernel::KScheduler* CurrentScheduler(); diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 890c52198..62fb06c45 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -877,7 +877,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, Handle              const u64 thread_ticks = current_thread->GetCpuTime();              out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks); -        } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) { +        } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {              out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks;          } diff --git a/src/core/hle/service/ngct/ngct.cpp b/src/core/hle/service/ngct/ngct.cpp new file mode 100644 index 000000000..deb3abb28 --- /dev/null +++ b/src/core/hle/service/ngct/ngct.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included + +#include "common/string_util.h" +#include "core/core.h" +#include "core/hle/ipc_helpers.h" +#include "core/hle/service/ngct/ngct.h" +#include "core/hle/service/service.h" + +namespace Service::NGCT { + +class IService final : public ServiceFramework<IService> { +public: +    explicit IService(Core::System& system_) : ServiceFramework{system_, "ngct:u"} { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "Match"}, +            {1, &IService::Filter, "Filter"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } + +private: +    void Filter(Kernel::HLERequestContext& ctx) { +        const auto buffer = ctx.ReadBuffer(); +        const auto text = Common::StringFromFixedZeroTerminatedBuffer( +            reinterpret_cast<const char*>(buffer.data()), buffer.size()); + +        LOG_WARNING(Service_NGCT, "(STUBBED) called, text={}", text); + +        // Return the same string since we don't censor anything +        ctx.WriteBuffer(buffer); + +        IPC::ResponseBuilder rb{ctx, 2}; +        rb.Push(ResultSuccess); +    } +}; + +void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) { +    std::make_shared<IService>(system)->InstallAsService(system.ServiceManager()); +} + +} // namespace Service::NGCT diff --git a/src/core/hle/service/ngct/ngct.h b/src/core/hle/service/ngct/ngct.h new file mode 100644 index 000000000..1f2a47b78 --- /dev/null +++ b/src/core/hle/service/ngct/ngct.h @@ -0,0 +1,20 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included + +#pragma once + +namespace Core { +class System; +} + +namespace Service::SM { +class ServiceManager; +} + +namespace Service::NGCT { + +/// Registers all NGCT services with the specified service manager. +void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system); + +} // namespace Service::NGCT diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 0a53c0c81..9decb9290 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp @@ -277,37 +277,45 @@ private:      void GetCurrentNetworkProfile(Kernel::HLERequestContext& ctx) {          LOG_WARNING(Service_NIFM, "(STUBBED) called"); -        const SfNetworkProfileData network_profile_data{ -            .ip_setting_data{ -                .ip_address_setting{ -                    .is_automatic{true}, -                    .current_address{192, 168, 1, 100}, -                    .subnet_mask{255, 255, 255, 0}, -                    .gateway{192, 168, 1, 1}, -                }, -                .dns_setting{ -                    .is_automatic{true}, -                    .primary_dns{1, 1, 1, 1}, -                    .secondary_dns{1, 0, 0, 1}, +        const auto net_iface = Network::GetSelectedNetworkInterface(); + +        const SfNetworkProfileData network_profile_data = [&net_iface] { +            if (!net_iface) { +                return SfNetworkProfileData{}; +            } + +            return SfNetworkProfileData{ +                .ip_setting_data{ +                    .ip_address_setting{ +                        .is_automatic{true}, +                        .current_address{Network::TranslateIPv4(net_iface->ip_address)}, +                        .subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)}, +                        .gateway{Network::TranslateIPv4(net_iface->gateway)}, +                    }, +                    .dns_setting{ +                        .is_automatic{true}, +                        .primary_dns{1, 1, 1, 1}, +                        .secondary_dns{1, 0, 0, 1}, +                    }, +                    .proxy_setting{ +                        .enabled{false}, +                        .port{}, +                        .proxy_server{}, +                        .automatic_auth_enabled{}, +                        .user{}, +                        .password{}, +                    }, +                    .mtu{1500},                  }, -                .proxy_setting{ -                    .enabled{false}, -                    .port{}, -                    .proxy_server{}, -                    .automatic_auth_enabled{}, -                    .user{}, -                    .password{}, +                .uuid{0xdeadbeef, 0xdeadbeef}, +                .network_name{"yuzu Network"}, +                .wireless_setting_data{ +                    .ssid_length{12}, +                    .ssid{"yuzu Network"}, +                    .passphrase{"yuzupassword"},                  }, -                .mtu{1500}, -            }, -            .uuid{0xdeadbeef, 0xdeadbeef}, -            .network_name{"yuzu Network"}, -            .wireless_setting_data{ -                .ssid_length{12}, -                .ssid{"yuzu Network"}, -                .passphrase{"yuzupassword"}, -            }, -        }; +            }; +        }();          ctx.WriteBuffer(network_profile_data); @@ -352,38 +360,33 @@ private:          LOG_WARNING(Service_NIFM, "(STUBBED) called");          struct IpConfigInfo { -            IpAddressSetting ip_address_setting; -            DnsSetting dns_setting; +            IpAddressSetting ip_address_setting{}; +            DnsSetting dns_setting{};          };          static_assert(sizeof(IpConfigInfo) == sizeof(IpAddressSetting) + sizeof(DnsSetting),                        "IpConfigInfo has incorrect size."); -        IpConfigInfo ip_config_info{ -            .ip_address_setting{ -                .is_automatic{true}, -                .current_address{0, 0, 0, 0}, -                .subnet_mask{255, 255, 255, 0}, -                .gateway{192, 168, 1, 1}, -            }, -            .dns_setting{ -                .is_automatic{true}, -                .primary_dns{1, 1, 1, 1}, -                .secondary_dns{1, 0, 0, 1}, -            }, -        }; +        const auto net_iface = Network::GetSelectedNetworkInterface(); -        const auto iface = Network::GetSelectedNetworkInterface(); -        if (iface) { -            ip_config_info.ip_address_setting = -                IpAddressSetting{.is_automatic{true}, -                                 .current_address{Network::TranslateIPv4(iface->ip_address)}, -                                 .subnet_mask{Network::TranslateIPv4(iface->subnet_mask)}, -                                 .gateway{Network::TranslateIPv4(iface->gateway)}}; +        const IpConfigInfo ip_config_info = [&net_iface] { +            if (!net_iface) { +                return IpConfigInfo{}; +            } -        } else { -            LOG_ERROR(Service_NIFM, -                      "Couldn't get host network configuration info, using default values"); -        } +            return IpConfigInfo{ +                .ip_address_setting{ +                    .is_automatic{true}, +                    .current_address{Network::TranslateIPv4(net_iface->ip_address)}, +                    .subnet_mask{Network::TranslateIPv4(net_iface->subnet_mask)}, +                    .gateway{Network::TranslateIPv4(net_iface->gateway)}, +                }, +                .dns_setting{ +                    .is_automatic{true}, +                    .primary_dns{1, 1, 1, 1}, +                    .secondary_dns{1, 0, 0, 1}, +                }, +            }; +        }();          IPC::ResponseBuilder rb{ctx, 2 + (sizeof(IpConfigInfo) + 3) / sizeof(u32)};          rb.Push(ResultSuccess); diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index ce6065db2..a33e47d0b 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}  void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,                          u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,                          const Common::Rectangle<int>& crop_rect) { -    VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); +    const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);      LOG_TRACE(Service,                "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",                addr, offset, width, height, stride, format); -    using PixelFormat = Tegra::FramebufferConfig::PixelFormat; -    const Tegra::FramebufferConfig framebuffer{ -        addr,      offset,   width, height, stride, static_cast<PixelFormat>(format), -        transform, crop_rect}; +    const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); +    const Tegra::FramebufferConfig framebuffer{addr,   offset,       width,     height, +                                               stride, pixel_format, transform, crop_rect};      system.GetPerfStats().EndSystemFrame();      system.GPU().SwapBuffers(&framebuffer); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 759247eb0..78de3f354 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -42,7 +42,9 @@ struct IGBPBuffer {      u32_le index;      INSERT_PADDING_WORDS(3);      u32_le gpu_buffer_id; -    INSERT_PADDING_WORDS(17); +    INSERT_PADDING_WORDS(6); +    u32_le external_format; +    INSERT_PADDING_WORDS(10);      u32_le nvmap_handle;      u32_le offset;      INSERT_PADDING_WORDS(60); diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 00bff8caf..3ead813b0 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -298,7 +298,7 @@ void NVFlinger::Compose() {          auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");          ASSERT(nvdisp); -        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format, +        nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format,                       igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,                       buffer->get().transform, buffer->get().crop_rect); diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index b3e50433b..065133166 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp @@ -46,6 +46,7 @@  #include "core/hle/service/ncm/ncm.h"  #include "core/hle/service/nfc/nfc.h"  #include "core/hle/service/nfp/nfp.h" +#include "core/hle/service/ngct/ngct.h"  #include "core/hle/service/nifm/nifm.h"  #include "core/hle/service/nim/nim.h"  #include "core/hle/service/npns/npns.h" @@ -271,6 +272,7 @@ Services::Services(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system      NCM::InstallInterfaces(*sm, system);      NFC::InstallInterfaces(*sm, system);      NFP::InstallInterfaces(*sm, system); +    NGCT::InstallInterfaces(*sm, system);      NIFM::InstallInterfaces(*sm, system);      NIM::InstallInterfaces(*sm, system);      NPNS::InstallInterfaces(*sm, system); diff --git a/src/core/network/network_interface.cpp b/src/core/network/network_interface.cpp index cecc9aa11..6811f21b1 100644 --- a/src/core/network/network_interface.cpp +++ b/src/core/network/network_interface.cpp @@ -37,73 +37,73 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {              AF_INET, GAA_FLAG_SKIP_MULTICAST | GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_INCLUDE_GATEWAYS,              nullptr, adapter_addresses.data(), &buf_size); -        if (ret == ERROR_BUFFER_OVERFLOW) { -            adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1); -        } else { +        if (ret != ERROR_BUFFER_OVERFLOW) {              break;          } + +        adapter_addresses.resize((buf_size / sizeof(IP_ADAPTER_ADDRESSES)) + 1);      } -    if (ret == NO_ERROR) { -        std::vector<NetworkInterface> result; +    if (ret != NO_ERROR) { +        LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses"); +        return {}; +    } -        for (auto current_address = adapter_addresses.data(); current_address != nullptr; -             current_address = current_address->Next) { -            if (current_address->FirstUnicastAddress == nullptr || -                current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) { -                continue; -            } +    std::vector<NetworkInterface> result; -            if (current_address->OperStatus != IfOperStatusUp) { -                continue; -            } +    for (auto current_address = adapter_addresses.data(); current_address != nullptr; +         current_address = current_address->Next) { +        if (current_address->FirstUnicastAddress == nullptr || +            current_address->FirstUnicastAddress->Address.lpSockaddr == nullptr) { +            continue; +        } -            const auto ip_addr = Common::BitCast<struct sockaddr_in>( -                                     *current_address->FirstUnicastAddress->Address.lpSockaddr) -                                     .sin_addr; +        if (current_address->OperStatus != IfOperStatusUp) { +            continue; +        } -            ULONG mask = 0; -            if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength, -                                        &mask) != NO_ERROR) { -                LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask"); -                continue; -            } +        const auto ip_addr = Common::BitCast<struct sockaddr_in>( +                                 *current_address->FirstUnicastAddress->Address.lpSockaddr) +                                 .sin_addr; -            struct in_addr gateway = {.S_un{.S_addr{0}}}; -            if (current_address->FirstGatewayAddress != nullptr && -                current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) { -                gateway = Common::BitCast<struct sockaddr_in>( -                              *current_address->FirstGatewayAddress->Address.lpSockaddr) -                              .sin_addr; -            } +        ULONG mask = 0; +        if (ConvertLengthToIpv4Mask(current_address->FirstUnicastAddress->OnLinkPrefixLength, +                                    &mask) != NO_ERROR) { +            LOG_ERROR(Network, "Failed to convert IPv4 prefix length to subnet mask"); +            continue; +        } -            result.push_back(NetworkInterface{ -                .name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})}, -                .ip_address{ip_addr}, -                .subnet_mask = in_addr{.S_un{.S_addr{mask}}}, -                .gateway = gateway}); +        struct in_addr gateway = {.S_un{.S_addr{0}}}; +        if (current_address->FirstGatewayAddress != nullptr && +            current_address->FirstGatewayAddress->Address.lpSockaddr != nullptr) { +            gateway = Common::BitCast<struct sockaddr_in>( +                          *current_address->FirstGatewayAddress->Address.lpSockaddr) +                          .sin_addr;          } -        return result; -    } else { -        LOG_ERROR(Network, "Failed to get network interfaces with GetAdaptersAddresses"); -        return {}; +        result.emplace_back(NetworkInterface{ +            .name{Common::UTF16ToUTF8(std::wstring{current_address->FriendlyName})}, +            .ip_address{ip_addr}, +            .subnet_mask = in_addr{.S_un{.S_addr{mask}}}, +            .gateway = gateway});      } + +    return result;  }  #else  std::vector<NetworkInterface> GetAvailableNetworkInterfaces() { -    std::vector<NetworkInterface> result; -      struct ifaddrs* ifaddr = nullptr;      if (getifaddrs(&ifaddr) != 0) {          LOG_ERROR(Network, "Failed to get network interfaces with getifaddrs: {}",                    std::strerror(errno)); -        return result; +        return {};      } +    std::vector<NetworkInterface> result; +      for (auto ifa = ifaddr; ifa != nullptr; ifa = ifa->ifa_next) {          if (ifa->ifa_addr == nullptr || ifa->ifa_netmask == nullptr) {              continue; @@ -117,55 +117,62 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {              continue;          } -        std::uint32_t gateway{0}; +        u32 gateway{}; +          std::ifstream file{"/proc/net/route"}; -        if (file.is_open()) { +        if (!file.is_open()) { +            LOG_ERROR(Network, "Failed to open \"/proc/net/route\""); -            // ignore header -            file.ignore(std::numeric_limits<std::streamsize>::max(), '\n'); +            result.emplace_back(NetworkInterface{ +                .name{ifa->ifa_name}, +                .ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr}, +                .subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr}, +                .gateway{in_addr{.s_addr = gateway}}}); +            continue; +        } -            bool gateway_found = false; +        // ignore header +        file.ignore(std::numeric_limits<std::streamsize>::max(), '\n'); -            for (std::string line; std::getline(file, line);) { -                std::istringstream iss{line}; +        bool gateway_found = false; -                std::string iface_name{}; -                iss >> iface_name; -                if (iface_name != ifa->ifa_name) { -                    continue; -                } +        for (std::string line; std::getline(file, line);) { +            std::istringstream iss{line}; -                iss >> std::hex; +            std::string iface_name; +            iss >> iface_name; +            if (iface_name != ifa->ifa_name) { +                continue; +            } -                std::uint32_t dest{0}; -                iss >> dest; -                if (dest != 0) { -                    // not the default route -                    continue; -                } +            iss >> std::hex; -                iss >> gateway; +            u32 dest{}; +            iss >> dest; +            if (dest != 0) { +                // not the default route +                continue; +            } -                std::uint16_t flags{0}; -                iss >> flags; +            iss >> gateway; -                // flag RTF_GATEWAY (defined in <linux/route.h>) -                if ((flags & 0x2) == 0) { -                    continue; -                } +            u16 flags{}; +            iss >> flags; -                gateway_found = true; -                break; +            // flag RTF_GATEWAY (defined in <linux/route.h>) +            if ((flags & 0x2) == 0) { +                continue;              } -            if (!gateway_found) { -                gateway = 0; -            } -        } else { -            LOG_ERROR(Network, "Failed to open \"/proc/net/route\""); +            gateway_found = true; +            break;          } -        result.push_back(NetworkInterface{ +        if (!gateway_found) { +            gateway = 0; +        } + +        result.emplace_back(NetworkInterface{              .name{ifa->ifa_name},              .ip_address{Common::BitCast<struct sockaddr_in>(*ifa->ifa_addr).sin_addr},              .subnet_mask{Common::BitCast<struct sockaddr_in>(*ifa->ifa_netmask).sin_addr}, @@ -180,11 +187,11 @@ std::vector<NetworkInterface> GetAvailableNetworkInterfaces() {  #endif  std::optional<NetworkInterface> GetSelectedNetworkInterface() { -    const std::string& selected_network_interface = Settings::values.network_interface.GetValue(); +    const auto& selected_network_interface = Settings::values.network_interface.GetValue();      const auto network_interfaces = Network::GetAvailableNetworkInterfaces();      if (network_interfaces.size() == 0) {          LOG_ERROR(Network, "GetAvailableNetworkInterfaces returned no interfaces"); -        return {}; +        return std::nullopt;      }      const auto res = @@ -192,12 +199,12 @@ std::optional<NetworkInterface> GetSelectedNetworkInterface() {              return iface.name == selected_network_interface;          }); -    if (res != network_interfaces.end()) { -        return *res; -    } else { +    if (res == network_interfaces.end()) {          LOG_ERROR(Network, "Couldn't find selected interface \"{}\"", selected_network_interface); -        return {}; +        return std::nullopt;      } + +    return *res;  }  } // namespace Network diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d29d8c14..2885e6799 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,6 +15,8 @@  namespace Shader::Backend::SPIRV {  namespace { +constexpr size_t NUM_FIXEDFNCTEXTURE = 10; +  enum class Operation {      Increment,      Decrement, @@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {          return pointer_type;      }  } + +size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations, +                              size_t start_offset) { +    for (size_t location = start_offset; location < used_locations.size(); ++location) { +        if (!used_locations.test(location)) { +            return location; +        } +    } +    throw RuntimeError("Unable to get an unused location for legacy attribute"); +}  } // Anonymous namespace  void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {          loads[IR::Attribute::TessellationEvaluationPointV]) {          tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);      } +    std::bitset<IR::NUM_GENERICS> used_locations{};      for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {          const AttributeType input_type{runtime_info.generic_input_types[index]};          if (!runtime_info.previous_stage_stores.Generic(index)) { @@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {          if (input_type == AttributeType::Disabled) {              continue;          } +        used_locations.set(index);          const Id type{GetAttributeType(*this, input_type)};          const Id id{DefineInput(*this, type, true)};          Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); @@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {              break;          }      } +    size_t previous_unused_location = 0; +    if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { +        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); +        previous_unused_location = location; +        used_locations.set(location); +        const Id id{DefineInput(*this, F32[4], true)}; +        Decorate(id, spv::Decoration::Location, location); +        input_front_color = id; +    } +    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { +        if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { +            const size_t location = +                FindNextUnusedLocation(used_locations, previous_unused_location); +            previous_unused_location = location; +            used_locations.set(location); +            const Id id{DefineInput(*this, F32[4], true)}; +            Decorate(id, spv::Decoration::Location, location); +            input_fixed_fnc_textures[index] = id; +        } +    }      if (stage == Stage::TessellationEval) {          for (size_t index = 0; index < info.uses_patches.size(); ++index) {              if (!info.uses_patches[index]) { @@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {          viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,                                       spv::BuiltIn::ViewportMaskNV);      } +    std::bitset<IR::NUM_GENERICS> used_locations{};      for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {          if (info.stores.Generic(index)) {              DefineGenericOutput(*this, index, invocations); +            used_locations.set(index); +        } +    } +    size_t previous_unused_location = 0; +    if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { +        const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); +        previous_unused_location = location; +        used_locations.set(location); +        const Id id{DefineOutput(*this, F32[4], invocations)}; +        Decorate(id, spv::Decoration::Location, static_cast<u32>(location)); +        output_front_color = id; +    } +    for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { +        if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { +            const size_t location = +                FindNextUnusedLocation(used_locations, previous_unused_location); +            previous_unused_location = location; +            used_locations.set(location); +            const Id id{DefineOutput(*this, F32[4], invocations)}; +            Decorate(id, spv::Decoration::Location, location); +            output_fixed_fnc_textures[index] = id;          }      }      switch (stage) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e277bc358..847d0c0e6 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -268,10 +268,14 @@ public:      Id write_global_func_u32x4{};      Id input_position{}; +    Id input_front_color{}; +    std::array<Id, 10> input_fixed_fnc_textures{};      std::array<Id, 32> input_generics{};      Id output_point_size{};      Id output_position{}; +    Id output_front_color{}; +    std::array<Id, 10> output_fixed_fnc_textures{};      std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};      Id output_tess_level_outer{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 14c77f162..68f360b3c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...      }  } +bool IsFixedFncTexture(IR::Attribute attribute) { +    return attribute >= IR::Attribute::FixedFncTexture0S && +           attribute <= IR::Attribute::FixedFncTexture9Q; +} + +u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) { +    if (!IsFixedFncTexture(attribute)) { +        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); +    } +    return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u; +} + +u32 FixedFncTextureAttributeElement(IR::Attribute attribute) { +    if (!IsFixedFncTexture(attribute)) { +        throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); +    } +    return static_cast<u32>(attribute) % 4u; +} +  template <typename... Args>  Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {      if (ctx.stage == Stage::TessellationControl) { @@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {              return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);          }      } +    if (IsFixedFncTexture(attr)) { +        const u32 index{FixedFncTextureAttributeIndex(attr)}; +        const u32 element{FixedFncTextureAttributeElement(attr)}; +        const Id element_id{ctx.Const(element)}; +        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index], +                                 element_id); +    }      switch (attr) {      case IR::Attribute::PointSize:          return ctx.output_point_size; @@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {          const Id element_id{ctx.Const(element)};          return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);      } +    case IR::Attribute::ColorFrontDiffuseR: +    case IR::Attribute::ColorFrontDiffuseG: +    case IR::Attribute::ColorFrontDiffuseB: +    case IR::Attribute::ColorFrontDiffuseA: { +        const u32 element{static_cast<u32>(attr) % 4}; +        const Id element_id{ctx.Const(element)}; +        return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id); +    }      case IR::Attribute::ClipDistance0:      case IR::Attribute::ClipDistance1:      case IR::Attribute::ClipDistance2: @@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {          const Id value{ctx.OpLoad(type->id, pointer)};          return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;      } +    if (IsFixedFncTexture(attr)) { +        const u32 index{FixedFncTextureAttributeIndex(attr)}; +        const Id attr_id{ctx.input_fixed_fnc_textures[index]}; +        const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))}; +        return ctx.OpLoad(ctx.F32[1], attr_ptr); +    }      switch (attr) {      case IR::Attribute::PrimitiveId:          return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); @@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {      case IR::Attribute::PositionW:          return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,                                                    ctx.Const(element))); +    case IR::Attribute::ColorFrontDiffuseR: +    case IR::Attribute::ColorFrontDiffuseG: +    case IR::Attribute::ColorFrontDiffuseB: +    case IR::Attribute::ColorFrontDiffuseA: { +        return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color, +                                                  ctx.Const(element))); +    }      case IR::Attribute::InstanceId:          if (ctx.profile.support_vertex_instance_id) {              return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); @@ -333,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {              return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));          }      case IR::Attribute::FrontFace: -        return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), -                            ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); +        return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), +                            ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), +                            ctx.f32_zero_value);      case IR::Attribute::PointSpriteS:          return ctx.OpLoad(ctx.F32[1],                            ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 8b3e0a15c..69eeaa3e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -20,6 +20,7 @@  #include "shader_recompiler/frontend/maxwell/decode.h"  #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"  #include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/host_translate_info.h"  #include "shader_recompiler/object_pool.h"  namespace Shader::Maxwell { @@ -652,7 +653,7 @@ class TranslatePass {  public:      TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,                    ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, -                  IR::AbstractSyntaxList& syntax_list_) +                  IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)          : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},            syntax_list{syntax_list_} {          Visit(root_stmt, nullptr, nullptr); @@ -660,6 +661,9 @@ public:          IR::Block& first_block{*syntax_list.front().data.block};          IR::IREmitter ir(first_block, first_block.begin());          ir.Prologue(); +        if (uses_demote_to_helper && host_info.needs_demote_reorder) { +            DemoteCombinationPass(); +        }      }  private: @@ -809,7 +813,14 @@ private:              }              case StatementType::Return: {                  ensure_block(); -                IR::IREmitter{*current_block}.Epilogue(); +                IR::Block* return_block{block_pool.Create(inst_pool)}; +                IR::IREmitter{*return_block}.Epilogue(); +                current_block->AddBranch(return_block); + +                auto& merge{syntax_list.emplace_back()}; +                merge.type = IR::AbstractSyntaxNode::Type::Block; +                merge.data.block = return_block; +                  current_block = nullptr;                  syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;                  break; @@ -824,6 +835,7 @@ private:                  auto& merge{syntax_list.emplace_back()};                  merge.type = IR::AbstractSyntaxNode::Type::Block;                  merge.data.block = demote_block; +                uses_demote_to_helper = true;                  break;              }              case StatementType::Unreachable: { @@ -855,11 +867,117 @@ private:          return block_pool.Create(inst_pool);      } +    void DemoteCombinationPass() { +        using Type = IR::AbstractSyntaxNode::Type; +        std::vector<IR::Block*> demote_blocks; +        std::vector<IR::U1> demote_conds; +        u32 num_epilogues{}; +        u32 branch_depth{}; +        for (const IR::AbstractSyntaxNode& node : syntax_list) { +            if (node.type == Type::If) { +                ++branch_depth; +            } +            if (node.type == Type::EndIf) { +                --branch_depth; +            } +            if (node.type != Type::Block) { +                continue; +            } +            if (branch_depth > 1) { +                // Skip reordering nested demote branches. +                continue; +            } +            for (const IR::Inst& inst : node.data.block->Instructions()) { +                const IR::Opcode op{inst.GetOpcode()}; +                if (op == IR::Opcode::DemoteToHelperInvocation) { +                    demote_blocks.push_back(node.data.block); +                    break; +                } +                if (op == IR::Opcode::Epilogue) { +                    ++num_epilogues; +                } +            } +        } +        if (demote_blocks.size() == 0) { +            return; +        } +        if (num_epilogues > 1) { +            LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); +            return; +        } +        s64 last_iterator_offset{}; +        auto& asl{syntax_list}; +        for (const IR::Block* demote_block : demote_blocks) { +            const auto start_it{asl.begin() + last_iterator_offset}; +            auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { +                return asn.type == Type::If && asn.data.if_node.body == demote_block; +            })}; +            if (asl_it == asl.end()) { +                // Demote without a conditional branch. +                // No need to proceed since all fragment instances will be demoted regardless. +                return; +            } +            const IR::Block* const end_if = asl_it->data.if_node.merge; +            demote_conds.push_back(asl_it->data.if_node.cond); +            last_iterator_offset = std::distance(asl.begin(), asl_it); + +            asl_it = asl.erase(asl_it); +            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { +                return asn.type == Type::Block && asn.data.block == demote_block; +            }); + +            asl_it = asl.erase(asl_it); +            asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { +                return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; +            }); +            asl_it = asl.erase(asl_it); +        } +        const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { +            if (asn.type != Type::Block) { +                return false; +            } +            for (const auto& inst : asn.data.block->Instructions()) { +                if (inst.GetOpcode() == IR::Opcode::Epilogue) { +                    return true; +                } +            } +            return false; +        }}; +        const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; +        const auto return_block_it{(reverse_it + 1).base()}; + +        IR::IREmitter ir{*(return_block_it - 1)->data.block}; +        IR::U1 cond(IR::Value(false)); +        for (const auto& demote_cond : demote_conds) { +            cond = ir.LogicalOr(cond, demote_cond); +        } +        cond.Inst()->DestructiveAddUsage(1); + +        IR::AbstractSyntaxNode demote_if_node{}; +        demote_if_node.type = Type::If; +        demote_if_node.data.if_node.cond = cond; +        demote_if_node.data.if_node.body = demote_blocks[0]; +        demote_if_node.data.if_node.merge = return_block_it->data.block; + +        IR::AbstractSyntaxNode demote_node{}; +        demote_node.type = Type::Block; +        demote_node.data.block = demote_blocks[0]; + +        IR::AbstractSyntaxNode demote_endif_node{}; +        demote_endif_node.type = Type::EndIf; +        demote_endif_node.data.end_if.merge = return_block_it->data.block; + +        asl.insert(return_block_it, demote_endif_node); +        asl.insert(return_block_it, demote_node); +        asl.insert(return_block_it, demote_if_node); +    } +      ObjectPool<Statement>& stmt_pool;      ObjectPool<IR::Inst>& inst_pool;      ObjectPool<IR::Block>& block_pool;      Environment& env;      IR::AbstractSyntaxList& syntax_list; +    bool uses_demote_to_helper{};  // TODO: C++20 Remove this when all compilers support constexpr std::vector  #if __cpp_lib_constexpr_vector >= 201907 @@ -871,12 +989,13 @@ private:  } // Anonymous namespace  IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, -                                Environment& env, Flow::CFG& cfg) { +                                Environment& env, Flow::CFG& cfg, +                                const HostTranslateInfo& host_info) {      ObjectPool<Statement> stmt_pool{64};      GotoPass goto_pass{cfg, stmt_pool};      Statement& root{goto_pass.RootStatement()};      IR::AbstractSyntaxList syntax_list; -    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; +    TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};      return syntax_list;  } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index 88b083649..e38158da3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,10 +11,13 @@  #include "shader_recompiler/frontend/maxwell/control_flow.h"  #include "shader_recompiler/object_pool.h" -namespace Shader::Maxwell { +namespace Shader { +struct HostTranslateInfo; +namespace Maxwell {  [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,                                                ObjectPool<IR::Block>& block_pool, Environment& env, -                                              Flow::CFG& cfg); +                                              Flow::CFG& cfg, const HostTranslateInfo& host_info); -} // namespace Shader::Maxwell +} // namespace Maxwell +} // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c067d459c..012d55357 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {  IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,                               Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {      IR::Program program; -    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); +    program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);      program.blocks = GenerateBlocks(program.syntax_list);      program.post_order_blocks = PostOrder(program.syntax_list.front());      program.stage = env.ShaderStage(); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 94a584219..96468b2e7 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h @@ -11,8 +11,9 @@ namespace Shader {  /// Misc information about the host  struct HostTranslateInfo { -    bool support_float16{}; ///< True when the device supports 16-bit floats -    bool support_int64{};   ///< True when the device supports 64-bit integers +    bool support_float16{};      ///< True when the device supports 16-bit floats +    bool support_int64{};        ///< True when the device supports 64-bit integers +    bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered  };  } // namespace Shader diff --git a/src/tests/common/param_package.cpp b/src/tests/common/param_package.cpp index 4c0f9654f..e31ca3544 100644 --- a/src/tests/common/param_package.cpp +++ b/src/tests/common/param_package.cpp @@ -4,11 +4,13 @@  #include <catch2/catch.hpp>  #include <math.h> +#include "common/logging/backend.h"  #include "common/param_package.h"  namespace Common {  TEST_CASE("ParamPackage", "[common]") { +    Common::Log::DisableLoggingInTests();      ParamPackage original{          {"abc", "xyz"},          {"def", "42"}, diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index c3318095c..be2113f5a 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -261,16 +261,6 @@ public:          stream_score += score;      } -    /// Sets the new frame tick -    void SetFrameTick(u64 new_frame_tick) noexcept { -        frame_tick = new_frame_tick; -    } - -    /// Returns the new frame tick -    [[nodiscard]] u64 FrameTick() const noexcept { -        return frame_tick; -    } -      /// Returns the likeliness of this being a stream buffer      [[nodiscard]] int StreamScore() const noexcept {          return stream_score; @@ -307,6 +297,14 @@ public:          return words.size_bytes;      } +    size_t getLRUID() const noexcept { +        return lru_id; +    } + +    void setLRUID(size_t lru_id_) { +        lru_id = lru_id_; +    } +  private:      template <Type type>      u64* Array() noexcept { @@ -603,9 +601,9 @@ private:      RasterizerInterface* rasterizer = nullptr;      VAddr cpu_addr = 0;      Words words; -    u64 frame_tick = 0;      BufferFlagBits flags{};      int stream_score = 0; +    size_t lru_id = SIZE_MAX;  };  } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3b43554f9..7bfd57369 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,6 +20,7 @@  #include "common/common_types.h"  #include "common/div_ceil.h"  #include "common/literals.h" +#include "common/lru_cache.h"  #include "common/microprofile.h"  #include "common/scope_exit.h"  #include "common/settings.h" @@ -330,7 +331,7 @@ private:      template <bool insert>      void ChangeRegister(BufferId buffer_id); -    void TouchBuffer(Buffer& buffer) const noexcept; +    void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;      bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); @@ -428,7 +429,11 @@ private:      size_t immediate_buffer_capacity = 0;      std::unique_ptr<u8[]> immediate_buffer_alloc; -    typename SlotVector<Buffer>::Iterator deletion_iterator; +    struct LRUItemParams { +        using ObjectType = BufferId; +        using TickType = u64; +    }; +    Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;      u64 frame_tick = 0;      u64 total_used_memory = 0; @@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,        kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {      // Ensure the first slot is used for the null buffer      void(slot_buffers.insert(runtime, NullBufferParams{})); -    deletion_iterator = slot_buffers.end();      common_ranges.clear();  } @@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() {      const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;      const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;      int num_iterations = aggressive_gc ? 64 : 32; -    for (; num_iterations > 0; --num_iterations) { -        if (deletion_iterator == slot_buffers.end()) { -            deletion_iterator = slot_buffers.begin(); -        } -        ++deletion_iterator; -        if (deletion_iterator == slot_buffers.end()) { -            break; -        } -        const auto [buffer_id, buffer] = *deletion_iterator; -        if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { -            DownloadBufferMemory(*buffer); -            DeleteBuffer(buffer_id); +    const auto clean_up = [this, &num_iterations](BufferId buffer_id) { +        if (num_iterations == 0) { +            return true;          } -    } +        --num_iterations; +        auto& buffer = slot_buffers[buffer_id]; +        DownloadBufferMemory(buffer); +        DeleteBuffer(buffer_id); +        return false; +    }; +    lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);  }  template <class P> @@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() {      const bool skip_preferred = hits * 256 < shots * 251;      uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; -    if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { +    if (total_used_memory >= EXPECTED_MEMORY) {          RunGarbageCollector();      }      ++frame_tick; @@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {  template <class P>  void BufferCache<P>::BindHostIndexBuffer() {      Buffer& buffer = slot_buffers[index_buffer.buffer_id]; -    TouchBuffer(buffer); +    TouchBuffer(buffer, index_buffer.buffer_id);      const u32 offset = buffer.Offset(index_buffer.cpu_addr);      const u32 size = index_buffer.size;      SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); @@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() {      for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {          const Binding& binding = vertex_buffers[index];          Buffer& buffer = slot_buffers[binding.buffer_id]; -        TouchBuffer(buffer); +        TouchBuffer(buffer, binding.buffer_id);          SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);          if (!flags[Dirty::VertexBuffer0 + index]) {              continue; @@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32      const VAddr cpu_addr = binding.cpu_addr;      const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);      Buffer& buffer = slot_buffers[binding.buffer_id]; -    TouchBuffer(buffer); +    TouchBuffer(buffer, binding.buffer_id);      const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&                                   size <= uniform_buffer_skip_cache_size &&                                   !buffer.IsRegionGpuModified(cpu_addr, size); @@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {      ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {          const Binding& binding = storage_buffers[stage][index];          Buffer& buffer = slot_buffers[binding.buffer_id]; -        TouchBuffer(buffer); +        TouchBuffer(buffer, binding.buffer_id);          const u32 size = binding.size;          SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {      for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {          const Binding& binding = transform_feedback_buffers[index];          Buffer& buffer = slot_buffers[binding.buffer_id]; -        TouchBuffer(buffer); +        TouchBuffer(buffer, binding.buffer_id);          const u32 size = binding.size;          SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {      ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {          const Binding& binding = compute_uniform_buffers[index];          Buffer& buffer = slot_buffers[binding.buffer_id]; -        TouchBuffer(buffer); +        TouchBuffer(buffer, binding.buffer_id);          const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);          SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {      ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {          const Binding& binding = compute_storage_buffers[index];          Buffer& buffer = slot_buffers[binding.buffer_id]; -        TouchBuffer(buffer); +        TouchBuffer(buffer, binding.buffer_id);          const u32 size = binding.size;          SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {      const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);      const u32 size = static_cast<u32>(overlap.end - overlap.begin);      const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); -    TouchBuffer(slot_buffers[new_buffer_id]);      for (const BufferId overlap_id : overlap.ids) {          JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);      }      Register(new_buffer_id); +    TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id);      return new_buffer_id;  } @@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) {  template <class P>  template <bool insert>  void BufferCache<P>::ChangeRegister(BufferId buffer_id) { -    const Buffer& buffer = slot_buffers[buffer_id]; +    Buffer& buffer = slot_buffers[buffer_id];      const auto size = buffer.SizeBytes();      if (insert) {          total_used_memory += Common::AlignUp(size, 1024); +        buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick));      } else {          total_used_memory -= Common::AlignUp(size, 1024); +        lru_cache.Free(buffer.getLRUID());      }      const VAddr cpu_addr_begin = buffer.CpuAddr();      const VAddr cpu_addr_end = cpu_addr_begin + size; @@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {  }  template <class P> -void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { -    buffer.SetFrameTick(frame_tick); +void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { +    if (buffer_id != NULL_BUFFER_ID) { +        lru_cache.Touch(buffer.getLRUID(), frame_tick); +    }  }  template <class P> diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 70030066a..d7e749485 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() {      uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);      uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); +    ASSERT(!current_frame_info.segment_enabled);      uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).      const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 87eafdb03..3b1ed4b3a 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -22,7 +22,7 @@ struct Vp9FrameDimensions {  };  static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); -enum FrameFlags : u32 { +enum class FrameFlags : u32 {      IsKeyFrame = 1 << 0,      LastFrameIsKeyFrame = 1 << 1,      FrameSizeChanged = 1 << 2, @@ -30,6 +30,7 @@ enum FrameFlags : u32 {      LastShowFrame = 1 << 4,      IntraOnly = 1 << 5,  }; +DECLARE_ENUM_FLAG_OPERATORS(FrameFlags)  enum class TxSize {      Tx4x4 = 0,   // 4x4 transform @@ -92,44 +93,34 @@ struct Vp9EntropyProbs {  static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");  struct Vp9PictureInfo { -    bool is_key_frame; -    bool intra_only; -    bool last_frame_was_key; -    bool frame_size_changed; -    bool error_resilient_mode; -    bool last_frame_shown; -    bool show_frame; +    u32 bitstream_size; +    std::array<u64, 4> frame_offsets;      std::array<s8, 4> ref_frame_sign_bias;      s32 base_q_index;      s32 y_dc_delta_q;      s32 uv_dc_delta_q;      s32 uv_ac_delta_q; -    bool lossless;      s32 transform_mode; -    bool allow_high_precision_mv;      s32 interp_filter;      s32 reference_mode; -    s8 comp_fixed_ref; -    std::array<s8, 2> comp_var_ref;      s32 log2_tile_cols;      s32 log2_tile_rows; -    bool segment_enabled; -    bool segment_map_update; -    bool segment_map_temporal_update; -    s32 segment_abs_delta; -    std::array<u32, 8> segment_feature_enable; -    std::array<std::array<s16, 4>, 8> segment_feature_data; -    bool mode_ref_delta_enabled; -    bool use_prev_in_find_mv_refs;      std::array<s8, 4> ref_deltas;      std::array<s8, 2> mode_deltas;      Vp9EntropyProbs entropy;      Vp9FrameDimensions frame_size;      u8 first_level;      u8 sharpness_level; -    u32 bitstream_size; -    std::array<u64, 4> frame_offsets; -    std::array<bool, 4> refresh_frame; +    bool is_key_frame; +    bool intra_only; +    bool last_frame_was_key; +    bool error_resilient_mode; +    bool last_frame_shown; +    bool show_frame; +    bool lossless; +    bool allow_high_precision_mv; +    bool segment_enabled; +    bool mode_ref_delta_enabled;  };  struct Vp9FrameContainer { @@ -145,7 +136,7 @@ struct PictureInfo {      Vp9FrameDimensions golden_frame_size;  ///< 0x50      Vp9FrameDimensions alt_frame_size;     ///< 0x58      Vp9FrameDimensions current_frame_size; ///< 0x60 -    u32 vp9_flags;                         ///< 0x68 +    FrameFlags vp9_flags;                  ///< 0x68      std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C      u8 first_level;                        ///< 0x70      u8 sharpness_level;                    ///< 0x71 @@ -158,60 +149,43 @@ struct PictureInfo {      u8 allow_high_precision_mv;            ///< 0x78      u8 interp_filter;                      ///< 0x79      u8 reference_mode;                     ///< 0x7A -    s8 comp_fixed_ref;                     ///< 0x7B -    std::array<s8, 2> comp_var_ref;        ///< 0x7C +    INSERT_PADDING_BYTES_NOINIT(3);        ///< 0x7B      u8 log2_tile_cols;                     ///< 0x7E      u8 log2_tile_rows;                     ///< 0x7F      Segmentation segmentation;             ///< 0x80      LoopFilter loop_filter;                ///< 0xE4 -    INSERT_PADDING_BYTES_NOINIT(5);        ///< 0xEB -    u32 surface_params;                    ///< 0xF0 -    INSERT_PADDING_WORDS_NOINIT(3);        ///< 0xF4 +    INSERT_PADDING_BYTES_NOINIT(21);       ///< 0xEB      [[nodiscard]] Vp9PictureInfo Convert() const {          return { -            .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, -            .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, -            .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, -            .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, -            .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, -            .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, -            .show_frame = true, +            .bitstream_size = bitstream_size, +            .frame_offsets{},              .ref_frame_sign_bias = ref_frame_sign_bias,              .base_q_index = base_q_index,              .y_dc_delta_q = y_dc_delta_q,              .uv_dc_delta_q = uv_dc_delta_q,              .uv_ac_delta_q = uv_ac_delta_q, -            .lossless = lossless != 0,              .transform_mode = tx_mode, -            .allow_high_precision_mv = allow_high_precision_mv != 0,              .interp_filter = interp_filter,              .reference_mode = reference_mode, -            .comp_fixed_ref = comp_fixed_ref, -            .comp_var_ref = comp_var_ref,              .log2_tile_cols = log2_tile_cols,              .log2_tile_rows = log2_tile_rows, -            .segment_enabled = segmentation.enabled != 0, -            .segment_map_update = segmentation.update_map != 0, -            .segment_map_temporal_update = segmentation.temporal_update != 0, -            .segment_abs_delta = segmentation.abs_delta, -            .segment_feature_enable = segmentation.feature_mask, -            .segment_feature_data = segmentation.feature_data, -            .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, -            .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) && -                                        !(vp9_flags == (FrameFlags::FrameSizeChanged)) && -                                        !(vp9_flags == (FrameFlags::IntraOnly)) && -                                        (vp9_flags == (FrameFlags::LastShowFrame)) && -                                        !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)),              .ref_deltas = loop_filter.ref_deltas,              .mode_deltas = loop_filter.mode_deltas,              .entropy{},              .frame_size = current_frame_size,              .first_level = first_level,              .sharpness_level = sharpness_level, -            .bitstream_size = bitstream_size, -            .frame_offsets{}, -            .refresh_frame{}, +            .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame), +            .intra_only = True(vp9_flags & FrameFlags::IntraOnly), +            .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame), +            .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode), +            .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame), +            .show_frame = true, +            .lossless = lossless != 0, +            .allow_high_precision_mv = allow_high_precision_mv != 0, +            .segment_enabled = segmentation.enabled != 0, +            .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,          };      }  }; @@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48);  ASSERT_POSITION(first_level, 0x70);  ASSERT_POSITION(segmentation, 0x80);  ASSERT_POSITION(loop_filter, 0xE4); -ASSERT_POSITION(surface_params, 0xF0);  #undef ASSERT_POSITION  #define ASSERT_POSITION(field_name, position)                                                      \ diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1aa43523a..7f4ca6282 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -475,10 +475,10 @@ public:                  // These values are used by Nouveau and some games.                  AddGL = 0x8006, -                SubtractGL = 0x8007, -                ReverseSubtractGL = 0x8008, -                MinGL = 0x800a, -                MaxGL = 0x800b +                MinGL = 0x8007, +                MaxGL = 0x8008, +                SubtractGL = 0x800a, +                ReverseSubtractGL = 0x800b              };              enum class Factor : u32 { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 882eff880..c60ed6453 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -463,6 +463,7 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(          ++page_index;          page_offset = 0;          remaining_size -= num_bytes; +        old_page_addr = page_addr;      }      split();      return result; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ee992aed4..de9e41659 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -156,6 +156,10 @@ public:          return shader_backend;      } +    bool IsAmd() const { +        return vendor_name == "ATI Technologies Inc."; +    } +  private:      static bool TestVariableAoffi();      static bool TestPreciseBug(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1f4dda17e..b0e14182e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo        host_info{            .support_float16 = false,            .support_int64 = device.HasShaderInt64(), +          .needs_demote_reorder = device.IsAmd(),        } {      if (use_asynchronous_shaders) {          workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7c9b0d6db..9ff0a28cd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {          blit_screen.Recreate();      }      const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); -    scheduler.Flush(render_semaphore); +    const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); +    scheduler.Flush(render_semaphore, present_semaphore);      scheduler.WaitWorker();      swapchain.Present(render_semaphore); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 5c43b8acf..888bc7392 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,          const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;          const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); -        const size_t size_bytes = GetSizeInBytes(framebuffer);          // TODO(Rodrigo): Read this from HLE          constexpr u32 block_height_log2 = 4;          const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); +        const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel, +                                                           framebuffer.stride, framebuffer.height, +                                                           1, block_height_log2, 0)};          Tegra::Texture::UnswizzleTexture(              mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),              bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); @@ -356,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() {  void VKBlitScreen::CreateRenderPass() {      const VkAttachmentDescription color_attachment{          .flags = 0, -        .format = swapchain.GetImageFormat(), +        .format = swapchain.GetImageViewFormat(),          .samples = VK_SAMPLE_COUNT_1_BIT,          .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,          .storeOp = VK_ATTACHMENT_STORE_OP_STORE, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a37ca1fdf..31bfbcb06 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -281,7 +281,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw          .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,          .unified_descriptor_binding = true,          .support_descriptor_aliasing = true, -        .support_int8 = true, +        .support_int8 = device.IsInt8Supported(),          .support_int16 = device.IsShaderInt16Supported(),          .support_int64 = device.IsShaderInt64Supported(),          .support_vertex_instance_id = false, @@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw      host_info = Shader::HostTranslateInfo{          .support_float16 = device.IsFloat16Supported(),          .support_int64 = device.IsShaderInt64Supported(), +        .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || +                                driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,      };  } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 4840962de..1d438787a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() {      worker_thread.join();  } -void VKScheduler::Flush(VkSemaphore semaphore) { -    SubmitExecution(semaphore); +void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { +    SubmitExecution(signal_semaphore, wait_semaphore);      AllocateNewContext();  } -void VKScheduler::Finish(VkSemaphore semaphore) { +void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {      const u64 presubmit_tick = CurrentTick(); -    SubmitExecution(semaphore); +    SubmitExecution(signal_semaphore, wait_semaphore);      WaitWorker();      Wait(presubmit_tick);      AllocateNewContext(); @@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() {      });  } -void VKScheduler::SubmitExecution(VkSemaphore semaphore) { +void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {      EndPendingOperations();      InvalidateState();      const u64 signal_value = master_semaphore->NextTick(); -    Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { +    Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {          cmdbuf.End(); - -        const u32 num_signal_semaphores = semaphore ? 2U : 1U; - -        const u64 wait_value = signal_value - 1; -        const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; -          const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + +        const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;          const std::array signal_values{signal_value, u64(0)}; -        const std::array signal_semaphores{timeline_semaphore, semaphore}; +        const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + +        const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; +        const std::array wait_values{signal_value - 1, u64(1)}; +        const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; +        static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ +            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, +            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, +        };          const VkTimelineSemaphoreSubmitInfoKHR timeline_si{              .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,              .pNext = nullptr, -            .waitSemaphoreValueCount = 1, -            .pWaitSemaphoreValues = &wait_value, +            .waitSemaphoreValueCount = num_wait_semaphores, +            .pWaitSemaphoreValues = wait_values.data(),              .signalSemaphoreValueCount = num_signal_semaphores,              .pSignalSemaphoreValues = signal_values.data(),          };          const VkSubmitInfo submit_info{              .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,              .pNext = &timeline_si, -            .waitSemaphoreCount = 1, -            .pWaitSemaphores = &timeline_semaphore, -            .pWaitDstStageMask = &wait_stage_mask, +            .waitSemaphoreCount = num_wait_semaphores, +            .pWaitSemaphores = wait_semaphores.data(), +            .pWaitDstStageMask = wait_stage_masks.data(),              .commandBufferCount = 1,              .pCommandBuffers = cmdbuf.address(),              .signalSemaphoreCount = num_signal_semaphores, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cf39a2363..759ed5a48 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -34,10 +34,10 @@ public:      ~VKScheduler();      /// Sends the current execution context to the GPU. -    void Flush(VkSemaphore semaphore = nullptr); +    void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);      /// Sends the current execution context to the GPU and waits for it to complete. -    void Finish(VkSemaphore semaphore = nullptr); +    void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);      /// Waits for the worker thread to finish executing everything. After this function returns it's      /// safe to touch worker resources. @@ -191,7 +191,7 @@ private:      void AllocateWorkerCommandBuffer(); -    void SubmitExecution(VkSemaphore semaphore); +    void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);      void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index d990eefba..aadf03cb0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -20,16 +20,15 @@ namespace Vulkan {  namespace { -VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) { +VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) {      if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {          VkSurfaceFormatKHR format;          format.format = VK_FORMAT_B8G8R8A8_UNORM;          format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;          return format;      } -    const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { -        const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; -        return format.format == request_format && +    const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) { +        return format.format == VK_FORMAT_B8G8R8A8_UNORM &&                 format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;      });      return found != formats.end() ? *found : formats[0]; @@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() {  }  void VKSwapchain::Present(VkSemaphore render_semaphore) { -    const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; -    const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};      const auto present_queue{device.GetPresentQueue()};      const VkPresentInfoKHR present_info{          .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,          .pNext = nullptr, -        .waitSemaphoreCount = render_semaphore ? 2U : 1U, -        .pWaitSemaphores = semaphores.data(), +        .waitSemaphoreCount = render_semaphore ? 1U : 0U, +        .pWaitSemaphores = &render_semaphore,          .swapchainCount = 1,          .pSwapchains = swapchain.address(),          .pImageIndices = &image_index, @@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,      const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};      const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; -    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; +    const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)};      const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};      u32 requested_image_count{capabilities.minImageCount + 1}; @@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,          swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());          swapchain_ci.pQueueFamilyIndices = queue_indices.data();      } +    static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB}; +    VkImageFormatListCreateInfo format_list{ +        .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR, +        .pNext = nullptr, +        .viewFormatCount = static_cast<u32>(view_formats.size()), +        .pViewFormats = view_formats.data(), +    }; +    if (device.IsKhrSwapchainMutableFormatEnabled()) { +        format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list); +        swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR; +    }      // Request the size again to reduce the possibility of a TOCTOU race condition.      const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);      swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,      images = swapchain.GetImages();      image_count = static_cast<u32>(images.size()); -    image_format = surface_format.format; +    image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;  }  void VKSwapchain::CreateSemaphores() { @@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() {          .flags = 0,          .image = {},          .viewType = VK_IMAGE_VIEW_TYPE_2D, -        .format = image_format, +        .format = image_view_format,          .components =              {                  .r = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 35c2cdc14..5bce41e21 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -68,8 +68,12 @@ public:          return *image_views[index];      } -    VkFormat GetImageFormat() const { -        return image_format; +    VkFormat GetImageViewFormat() const { +        return image_view_format; +    } + +    VkSemaphore CurrentPresentSemaphore() const { +        return *present_semaphores[frame_index];      }  private: @@ -96,7 +100,7 @@ private:      u32 image_index{};      u32 frame_index{}; -    VkFormat image_format{}; +    VkFormat image_view_format{};      VkExtent2D extent{};      bool current_srgb{}; diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b..0c17a791b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -80,7 +80,7 @@ struct ImageBase {      VAddr cpu_addr_end = 0;      u64 modification_tick = 0; -    u64 frame_tick = 0; +    size_t lru_index = SIZE_MAX;      std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a087498ff..24b809242 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,7 +5,6 @@  #pragma once  #include "common/alignment.h" -#include "common/settings.h"  #include "video_core/dirty_flags.h"  #include "video_core/texture_cache/samples_helper.h"  #include "video_core/texture_cache/texture_cache_base.h" @@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&      void(slot_image_views.insert(runtime, NullImageParams{}));      void(slot_samplers.insert(runtime, sampler_descriptor)); -    deletion_iterator = slot_images.begin(); -      if constexpr (HAS_DEVICE_MEMORY_INFO) {          const auto device_memory = runtime.GetDeviceLocalMemory();          const u64 possible_expected_memory = (device_memory * 3) / 10; @@ -64,70 +61,38 @@ template <class P>  void TextureCache<P>::RunGarbageCollector() {      const bool high_priority_mode = total_used_memory >= expected_memory;      const bool aggressive_mode = total_used_memory >= critical_memory; -    const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; -    int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); -    for (; num_iterations > 0; --num_iterations) { -        if (deletion_iterator == slot_images.end()) { -            deletion_iterator = slot_images.begin(); -            if (deletion_iterator == slot_images.end()) { -                break; -            } +    const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; +    size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); +    const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { +        if (num_iterations == 0) { +            return true;          } -        auto [image_id, image_tmp] = *deletion_iterator; -        Image* image = image_tmp; // fix clang error. -        const bool is_alias = True(image->flags & ImageFlagBits::Alias); -        const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); -        const bool must_download = image->IsSafeDownload(); -        bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); -        const u64 ticks_needed = -            is_bad_overlap -                ? ticks_to_destroy >> 4 -                : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); -        should_care |= aggressive_mode; -        if (should_care && image->frame_tick + ticks_needed < frame_tick) { -            if (is_bad_overlap) { -                const bool overlap_check = std::ranges::all_of( -                    image->overlapping_images, [&, image](const ImageId& overlap_id) { -                        auto& overlap = slot_images[overlap_id]; -                        return overlap.frame_tick >= image->frame_tick; -                    }); -                if (!overlap_check) { -                    ++deletion_iterator; -                    continue; -                } -            } -            if (!is_bad_overlap && must_download) { -                const bool alias_check = std::ranges::none_of( -                    image->aliased_images, [&, image](const AliasedImage& alias) { -                        auto& alias_image = slot_images[alias.id]; -                        return (alias_image.frame_tick < image->frame_tick) || -                               (alias_image.modification_tick < image->modification_tick); -                    }); - -                if (alias_check) { -                    auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); -                    const auto copies = FullDownloadCopies(image->info); -                    image->DownloadMemory(map, copies); -                    runtime.Finish(); -                    SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); -                } -            } -            if (True(image->flags & ImageFlagBits::Tracked)) { -                UntrackImage(*image, image_id); -            } -            UnregisterImage(image_id); -            DeleteImage(image_id); -            if (is_bad_overlap) { -                ++num_iterations; -            } +        --num_iterations; +        auto& image = slot_images[image_id]; +        const bool must_download = image.IsSafeDownload(); +        if (!high_priority_mode && must_download) { +            return false;          } -        ++deletion_iterator; -    } +        if (must_download) { +            auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); +            const auto copies = FullDownloadCopies(image.info); +            image.DownloadMemory(map, copies); +            runtime.Finish(); +            SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); +        } +        if (True(image.flags & ImageFlagBits::Tracked)) { +            UntrackImage(image, image_id); +        } +        UnregisterImage(image_id); +        DeleteImage(image_id); +        return false; +    }; +    lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);  }  template <class P>  void TextureCache<P>::TickFrame() { -    if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { +    if (total_used_memory > minimum_memory) {          RunGarbageCollector();      }      sentenced_images.Tick(); @@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {          tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);      }      total_used_memory += Common::AlignUp(tentative_size, 1024); +    image.lru_index = lru_cache.Insert(image_id, frame_tick); +      ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,                     [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });      if (False(image.flags & ImageFlagBits::Sparse)) { @@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {          tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);      }      total_used_memory -= Common::AlignUp(tentative_size, 1024); +    lru_cache.Free(image.lru_index);      const auto& clear_page_table =          [this, image_id](              u64 page, @@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool      if (is_modification) {          MarkModification(image);      } -    image.frame_tick = frame_tick; +    lru_cache.Touch(image.lru_index, frame_tick);  }  template <class P> diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e4ae351cb..d7528ed24 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -14,6 +14,7 @@  #include "common/common_types.h"  #include "common/literals.h" +#include "common/lru_cache.h"  #include "video_core/compatible_formats.h"  #include "video_core/delayed_destruction_ring.h"  #include "video_core/engines/fermi_2d.h" @@ -370,6 +371,12 @@ private:      std::vector<ImageId> uncommitted_downloads;      std::queue<std::vector<ImageId>> committed_downloads; +    struct LRUItemParams { +        using ObjectType = ImageId; +        using TickType = u64; +    }; +    Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; +      static constexpr size_t TICKS_TO_DESTROY = 6;      DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;      DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; @@ -379,7 +386,6 @@ private:      u64 modification_tick = 0;      u64 frame_tick = 0; -    typename SlotVector<Image>::Iterator deletion_iterator;  };  } // namespace VideoCommon diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index c010b9353..24e943e4c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -63,14 +63,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32                  const u32 unswizzled_offset =                      slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; -                if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset); -                    offset >= input.size()) { -                    // TODO(Rodrigo): This is an out of bounds access that should never happen. To -                    // avoid crashing the emulator, break. -                    ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size()); -                    break; -                } -                  u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];                  const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8e56a89e1..24821c1a3 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -368,18 +368,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR      };      SetNext(next, demote); -    VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; -    if (is_float16_supported) { -        float16_int8 = { +    if (is_int8_supported || is_float16_supported) { +        VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8{              .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR,              .pNext = nullptr, -            .shaderFloat16 = true, -            .shaderInt8 = false, +            .shaderFloat16 = is_float16_supported, +            .shaderInt8 = is_int8_supported,          };          SetNext(next, float16_int8); -    } else { +    } +    if (!is_float16_supported) {          LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");      } +    if (!is_int8_supported) { +        LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively"); +    }      if (!nv_viewport_swizzle) {          LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); @@ -836,6 +839,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {      bool has_khr_shader_float16_int8{};      bool has_khr_workgroup_memory_explicit_layout{};      bool has_khr_pipeline_executable_properties{}; +    bool has_khr_image_format_list{}; +    bool has_khr_swapchain_mutable_format{};      bool has_ext_subgroup_size_control{};      bool has_ext_transform_feedback{};      bool has_ext_custom_border_color{}; @@ -885,6 +890,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {          test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);          test(has_khr_workgroup_memory_explicit_layout,               VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); +        test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); +        test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, +             false);          test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false);          if (Settings::values.enable_nsight_aftermath) {              test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, @@ -909,6 +917,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {          physical.GetFeatures2KHR(features);          is_float16_supported = float16_int8_features.shaderFloat16; +        is_int8_supported = float16_int8_features.shaderInt8;          extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);      }      if (has_ext_subgroup_size_control) { @@ -1062,6 +1071,11 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {              khr_pipeline_executable_properties = true;          }      } +    if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { +        extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); +        extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); +        khr_swapchain_mutable_format = true; +    }      if (khr_push_descriptor) {          VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;          push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index c19f40746..5599c38c5 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -139,11 +139,16 @@ public:          return is_optimal_astc_supported;      } -    /// Returns true if the device supports float16 natively +    /// Returns true if the device supports float16 natively.      bool IsFloat16Supported() const {          return is_float16_supported;      } +    /// Returns true if the device supports int8 natively. +    bool IsInt8Supported() const { +        return is_int8_supported; +    } +      /// Returns true if the device warp size can potentially be bigger than guest's warp size.      bool IsWarpSizePotentiallyBiggerThanGuest() const {          return is_warp_potentially_bigger; @@ -219,6 +224,11 @@ public:          return khr_pipeline_executable_properties;      } +    /// Returns true if VK_KHR_swapchain_mutable_format is enabled. +    bool IsKhrSwapchainMutableFormatEnabled() const { +        return khr_swapchain_mutable_format; +    } +      /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.      bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {          return khr_workgroup_memory_explicit_layout; @@ -367,7 +377,8 @@ private:      u64 device_access_memory{};                 ///< Total size of device local memory in bytes.      u32 max_push_descriptors{};                 ///< Maximum number of push descriptors      bool is_optimal_astc_supported{};           ///< Support for native ASTC. -    bool is_float16_supported{};                ///< Support for float16 arithmetics. +    bool is_float16_supported{};                ///< Support for float16 arithmetic. +    bool is_int8_supported{};                   ///< Support for int8 arithmetic.      bool is_warp_potentially_bigger{};          ///< Host warp size can be bigger than guest.      bool is_formatless_image_load_supported{};  ///< Support for shader image read without format.      bool is_depth_bounds_supported{};           ///< Support for depth bounds. @@ -384,6 +395,7 @@ private:      bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.      bool khr_push_descriptor{};                  ///< Support for VK_KHR_push_descritor.      bool khr_pipeline_executable_properties{};   ///< Support for executable properties. +    bool khr_swapchain_mutable_format{};         ///< Support for VK_KHR_swapchain_mutable_format.      bool ext_index_type_uint8{};                 ///< Support for VK_EXT_index_type_uint8.      bool ext_sampler_filter_minmax{};            ///< Support for VK_EXT_sampler_filter_minmax.      bool ext_depth_range_unrestricted{};         ///< Support for VK_EXT_depth_range_unrestricted. diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index cf68a95b5..19ba0dbba 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -182,7 +182,14 @@ if (ENABLE_QT_TRANSLATION)      # Update source TS file if enabled      if (GENERATE_QT_TRANSLATION)          get_target_property(SRCS yuzu SOURCES) -        qt5_create_translation(QM_FILES ${SRCS} ${UIS} ${YUZU_QT_LANGUAGES}/en.ts) +        qt5_create_translation(QM_FILES +            ${SRCS} +            ${UIS} +            ${YUZU_QT_LANGUAGES}/en.ts +        OPTIONS +            -source-language en_US +            -target-language en_US +        )          add_custom_target(translation ALL DEPENDS ${YUZU_QT_LANGUAGES}/en.ts)      endif() diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 377795326..85d292bcc 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -818,7 +818,6 @@ void Config::ReadRendererValues() {      ReadGlobalSetting(Settings::values.shader_backend);      ReadGlobalSetting(Settings::values.use_asynchronous_shaders);      ReadGlobalSetting(Settings::values.use_fast_gpu_time); -    ReadGlobalSetting(Settings::values.use_caches_gc);      ReadGlobalSetting(Settings::values.bg_red);      ReadGlobalSetting(Settings::values.bg_green);      ReadGlobalSetting(Settings::values.bg_blue); @@ -1359,7 +1358,6 @@ void Config::SaveRendererValues() {                   Settings::values.shader_backend.UsingGlobal());      WriteGlobalSetting(Settings::values.use_asynchronous_shaders);      WriteGlobalSetting(Settings::values.use_fast_gpu_time); -    WriteGlobalSetting(Settings::values.use_caches_gc);      WriteGlobalSetting(Settings::values.bg_red);      WriteGlobalSetting(Settings::values.bg_green);      WriteGlobalSetting(Settings::values.bg_blue); diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 099ddbb7c..43f1887d1 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -156,7 +156,7 @@          <item>           <widget class="QCheckBox" name="use_disk_shader_cache">            <property name="text"> -           <string>Use disk shader cache</string> +           <string>Use disk pipeline cache</string>            </property>           </widget>          </item> diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a31b8e192..bfd464061 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -28,7 +28,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {      ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());      ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); -    ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());      ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());      if (Settings::IsConfiguringGlobal()) { @@ -55,8 +54,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {      ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,                                               ui->use_asynchronous_shaders,                                               use_asynchronous_shaders); -    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc, -                                             use_caches_gc);      ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,                                               ui->use_fast_gpu_time, use_fast_gpu_time);  } @@ -81,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {          ui->use_asynchronous_shaders->setEnabled(              Settings::values.use_asynchronous_shaders.UsingGlobal());          ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); -        ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal());          ui->anisotropic_filtering_combobox->setEnabled(              Settings::values.max_anisotropy.UsingGlobal()); @@ -94,8 +90,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {                                              use_asynchronous_shaders);      ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,                                              Settings::values.use_fast_gpu_time, use_fast_gpu_time); -    ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc, -                                            use_caches_gc);      ConfigurationShared::SetColoredComboBox(          ui->gpu_accuracy, ui->label_gpu_accuracy,          static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 7356e6916..13ba4ff6b 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -37,5 +37,4 @@ private:      ConfigurationShared::CheckState use_vsync;      ConfigurationShared::CheckState use_asynchronous_shaders;      ConfigurationShared::CheckState use_fast_gpu_time; -    ConfigurationShared::CheckState use_caches_gc;  }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 4fe6b86ae..b91abc2f0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -82,7 +82,7 @@             <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>            </property>            <property name="text"> -           <string>Use asynchronous shader building (hack)</string> +           <string>Use asynchronous shader building (Hack)</string>            </property>           </widget>          </item> @@ -92,17 +92,7 @@              <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>            </property>            <property name="text"> -           <string>Use Fast GPU Time (hack)</string> -          </property> -         </widget> -        </item> -        <item> -         <widget class="QCheckBox" name="use_caches_gc"> -          <property name="toolTip"> -           <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string> -          </property> -          <property name="text"> -           <string>Enable GPU cache garbage collection (experimental)</string> +           <string>Use Fast GPU Time (Hack)</string>            </property>           </widget>          </item> diff --git a/src/yuzu/debugger/console.cpp b/src/yuzu/debugger/console.cpp index 22ca1285d..f89ea8ea7 100644 --- a/src/yuzu/debugger/console.cpp +++ b/src/yuzu/debugger/console.cpp @@ -21,6 +21,7 @@ void ToggleConsole() {          console_shown = UISettings::values.show_console.GetValue();      } +    using namespace Common::Log;  #if defined(_WIN32) && !defined(_DEBUG)      FILE* temp;      if (UISettings::values.show_console) { @@ -29,24 +30,20 @@ void ToggleConsole() {              freopen_s(&temp, "CONIN$", "r", stdin);              freopen_s(&temp, "CONOUT$", "w", stdout);              freopen_s(&temp, "CONOUT$", "w", stderr); -            Common::Log::AddBackend(std::make_unique<Common::Log::ColorConsoleBackend>()); +            SetColorConsoleBackendEnabled(true);          }      } else {          if (FreeConsole()) {              // In order to close the console, we have to also detach the streams on it.              // Just redirect them to NUL if there is no console window -            Common::Log::RemoveBackend(Common::Log::ColorConsoleBackend::Name()); +            SetColorConsoleBackendEnabled(false);              freopen_s(&temp, "NUL", "r", stdin);              freopen_s(&temp, "NUL", "w", stdout);              freopen_s(&temp, "NUL", "w", stderr);          }      }  #else -    if (UISettings::values.show_console) { -        Common::Log::AddBackend(std::make_unique<Common::Log::ColorConsoleBackend>()); -    } else { -        Common::Log::RemoveBackend(Common::Log::ColorConsoleBackend::Name()); -    } +    SetColorConsoleBackendEnabled(UISettings::values.show_console.GetValue());  #endif  }  } // namespace Debugger diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index e97804220..f9d949e75 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri      QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));      QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location"));      QAction* open_transferable_shader_cache = -        context_menu.addAction(tr("Open Transferable Shader Cache")); +        context_menu.addAction(tr("Open Transferable Pipeline Cache"));      context_menu.addSeparator();      QMenu* remove_menu = context_menu.addMenu(tr("Remove"));      QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));      QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));      QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); -    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache")); -    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache")); +    QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache")); +    QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache"));      remove_menu->addSeparator(); -    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches")); +    QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches"));      QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));      QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));      QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 5940e0cfd..e36774cc6 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -175,21 +175,6 @@ void GMainWindow::ShowTelemetryCallout() {  const int GMainWindow::max_recent_files_item; -static void InitializeLogging() { -    using namespace Common; - -    Log::Filter log_filter; -    log_filter.ParseFilterString(Settings::values.log_filter.GetValue()); -    Log::SetGlobalFilter(log_filter); - -    const auto log_dir = FS::GetYuzuPath(FS::YuzuPath::LogDir); -    void(FS::CreateDir(log_dir)); -    Log::AddBackend(std::make_unique<Log::FileBackend>(log_dir / LOG_FILE)); -#ifdef _WIN32 -    Log::AddBackend(std::make_unique<Log::DebuggerBackend>()); -#endif -} -  static void RemoveCachedContents() {      const auto cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir);      const auto offline_fonts = cache_dir / "fonts"; @@ -207,8 +192,7 @@ GMainWindow::GMainWindow()      : input_subsystem{std::make_shared<InputCommon::InputSubsystem>()},        config{std::make_unique<Config>()}, vfs{std::make_shared<FileSys::RealVfsFilesystem>()},        provider{std::make_unique<FileSys::ManualContentProvider>()} { -    InitializeLogging(); - +    Common::Log::Initialize();      LoadTranslation();      setAcceptDrops(true); @@ -3437,6 +3421,7 @@ int main(int argc, char* argv[]) {      // generating shaders      setlocale(LC_ALL, "C"); +    Core::System::InitializeGlobalInstance();      GMainWindow main_window;      // After settings have been loaded by GMainWindow, apply the filter      main_window.show(); diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 4f14be524..757dd1ea0 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -468,7 +468,6 @@ void Config::ReadValues() {      ReadSetting("Renderer", Settings::values.use_nvdec_emulation);      ReadSetting("Renderer", Settings::values.accelerate_astc);      ReadSetting("Renderer", Settings::values.use_fast_gpu_time); -    ReadSetting("Renderer", Settings::values.use_caches_gc);      ReadSetting("Renderer", Settings::values.bg_red);      ReadSetting("Renderer", Settings::values.bg_green); diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index c10093820..ba2c993ba 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -74,31 +74,14 @@ static void PrintVersion() {      std::cout << "yuzu " << Common::g_scm_branch << " " << Common::g_scm_desc << std::endl;  } -static void InitializeLogging() { -    using namespace Common; - -    Log::Filter log_filter(Log::Level::Debug); -    log_filter.ParseFilterString(static_cast<std::string>(Settings::values.log_filter)); -    Log::SetGlobalFilter(log_filter); - -    Log::AddBackend(std::make_unique<Log::ColorConsoleBackend>()); - -    const auto& log_dir = FS::GetYuzuPath(FS::YuzuPath::LogDir); -    void(FS::CreateDir(log_dir)); -    Log::AddBackend(std::make_unique<Log::FileBackend>(log_dir / LOG_FILE)); -#ifdef _WIN32 -    Log::AddBackend(std::make_unique<Log::DebuggerBackend>()); -#endif -} -  /// Application entry point  int main(int argc, char** argv) { +    Common::Log::Initialize(); +    Common::Log::SetColorConsoleBackendEnabled(true);      Common::DetachedTasks detached_tasks;      Config config;      int option_index = 0; - -    InitializeLogging();  #ifdef _WIN32      int argc_w;      auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); @@ -163,6 +146,7 @@ int main(int argc, char** argv) {          return -1;      } +    Core::System::InitializeGlobalInstance();      auto& system{Core::System::GetInstance()};      InputCommon::InputSubsystem input_subsystem; | 
