diff options
38 files changed, 430 insertions, 269 deletions
| diff --git a/CMakeLists.txt b/CMakeLists.txt index 30a3c939e..3d03bbf94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,7 +255,7 @@ endif()  # boost asio's concept usage doesn't play nicely with some compilers yet.  add_definitions(-DBOOST_ASIO_DISABLE_CONCEPTS)  if (MSVC) -    add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/std:c++latest>) +    add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/std:c++20>)      # boost still makes use of deprecated result_of.      add_definitions(-D_HAS_DEPRECATED_RESULT_OF) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 55b113297..0696201df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -43,7 +43,7 @@ if (MSVC)          /Zo          /permissive-          /EHsc -        /std:c++latest +        /std:c++20          /utf-8          /volatile:iso          /Zc:externConstexpr @@ -51,8 +51,10 @@ if (MSVC)          /Zc:throwingNew          /GT +        # Modules +        /experimental:module- # Disable module support explicitly due to conflicts with precompiled headers +          # External headers diagnostics -        /experimental:external  # Enables the external headers options. This option isn't required in Visual Studio 2019 version 16.10 and later          /external:anglebrackets # Treats all headers included by #include <header>, where the header file is enclosed in angle brackets (< >), as external headers          /external:W0            # Sets the default warning level to 0 for external headers, effectively turning off warnings for external headers diff --git a/src/android/app/src/main/AndroidManifest.xml b/src/android/app/src/main/AndroidManifest.xml index 43087f2c0..eef566042 100644 --- a/src/android/app/src/main/AndroidManifest.xml +++ b/src/android/app/src/main/AndroidManifest.xml @@ -6,17 +6,10 @@ SPDX-License-Identifier: GPL-3.0-or-later  -->  <manifest xmlns:android="http://schemas.android.com/apk/res/android"> -    <uses-feature -        android:name="android.hardware.touchscreen" -        android:required="false"/> -    <uses-feature -        android:name="android.hardware.gamepad" -        android:required="false"/> - -    <uses-feature -        android:name="android.hardware.vulkan.version" -        android:version="0x401000" -        android:required="true" /> +    <uses-feature android:name="android.hardware.touchscreen" android:required="false" /> +    <uses-feature android:name="android.hardware.gamepad" android:required="false" /> +    <uses-feature android:name="android.software.leanback" android:required="false" /> +    <uses-feature android:name="android.hardware.vulkan.version" android:version="0x401000" android:required="true" />      <uses-permission android:name="android.permission.INTERNET" />      <uses-permission android:name="android.permission.FOREGROUND_SERVICE" /> @@ -31,7 +24,7 @@ SPDX-License-Identifier: GPL-3.0-or-later          android:hasFragileUserData="true"          android:supportsRtl="true"          android:isGame="true" -        android:banner="@drawable/ic_launcher" +        android:banner="@drawable/tv_banner"          android:extractNativeLibs="true"          android:fullBackupContent="@xml/data_extraction_rules"          android:dataExtractionRules="@xml/data_extraction_rules_api_31" @@ -44,9 +37,10 @@ SPDX-License-Identifier: GPL-3.0-or-later              <!-- This intentfilter marks this Activity as the one that gets launched from Home screen. -->              <intent-filter> -                <action android:name="android.intent.action.MAIN"/> +                <action android:name="android.intent.action.MAIN" /> -                <category android:name="android.intent.category.LAUNCHER"/> +                <category android:name="android.intent.category.LAUNCHER" /> +                <category android:name="android.intent.category.LEANBACK_LAUNCHER" />              </intent-filter>          </activity> diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt index 94d5156cf..f4db61cb3 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt @@ -263,7 +263,8 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener {          val config: Configuration = resources.configuration          if ((config.screenLayout and Configuration.SCREENLAYOUT_LONG_YES) != 0 || -            (config.screenLayout and Configuration.SCREENLAYOUT_LONG_NO) == 0) { +            (config.screenLayout and Configuration.SCREENLAYOUT_LONG_NO) == 0 || +            (config.screenLayout and Configuration.SCREENLAYOUT_SIZE_SMALL) != 0) {              return rotation          }          when (rotation) { diff --git a/src/android/app/src/main/res/drawable-xhdpi/tv_banner.png b/src/android/app/src/main/res/drawable-xhdpi/tv_banner.pngBinary files differ new file mode 100644 index 000000000..20c770591 --- /dev/null +++ b/src/android/app/src/main/res/drawable-xhdpi/tv_banner.png diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp index 07a679c32..703ef4494 100644 --- a/src/audio_core/audio_core.cpp +++ b/src/audio_core/audio_core.cpp @@ -47,12 +47,4 @@ AudioRenderer::ADSP::ADSP& AudioCore::GetADSP() {      return *adsp;  } -void AudioCore::SetNVDECActive(bool active) { -    nvdec_active = active; -} - -bool AudioCore::IsNVDECActive() const { -    return nvdec_active; -} -  } // namespace AudioCore diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h index e33e00a3e..ea047773e 100644 --- a/src/audio_core/audio_core.h +++ b/src/audio_core/audio_core.h @@ -57,18 +57,6 @@ public:       */      AudioRenderer::ADSP::ADSP& GetADSP(); -    /** -     * Toggle NVDEC state, used to avoid stall in playback. -     * -     * @param active - Set true if nvdec is active, otherwise false. -     */ -    void SetNVDECActive(bool active); - -    /** -     * Get NVDEC state. -     */ -    bool IsNVDECActive() const; -  private:      /**       * Create the sinks on startup. @@ -83,8 +71,6 @@ private:      std::unique_ptr<Sink::Sink> input_sink;      /// The ADSP in the sysmodule      std::unique_ptr<AudioRenderer::ADSP::ADSP> adsp; -    /// Is NVDec currently active? -    bool nvdec_active{false};  };  } // namespace AudioCore diff --git a/src/common/settings.cpp b/src/common/settings.cpp index ff53e80bb..9ff3edabb 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -235,6 +235,7 @@ void RestoreGlobalState(bool is_powered_on) {      values.bg_green.SetGlobal(true);      values.bg_blue.SetGlobal(true);      values.enable_compute_pipelines.SetGlobal(true); +    values.use_video_framerate.SetGlobal(true);      // System      values.language_index.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 7f865b2a7..9682281b0 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -482,6 +482,7 @@ struct Values {      SwitchableSetting<AstcRecompression, true> astc_recompression{          AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3,          "astc_recompression"}; +    SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"};      SwitchableSetting<u8> bg_red{0, "bg_red"};      SwitchableSetting<u8> bg_green{0, "bg_green"}; diff --git a/src/common/uuid.cpp b/src/common/uuid.cpp index 89e1ed225..035df7fe0 100644 --- a/src/common/uuid.cpp +++ b/src/common/uuid.cpp @@ -48,7 +48,7 @@ std::array<u8, 0x10> ConstructFromRawString(std::string_view raw_string) {  }  std::array<u8, 0x10> ConstructFromFormattedString(std::string_view formatted_string) { -    std::array<u8, 0x10> uuid; +    std::array<u8, 0x10> uuid{};      size_t i = 0; diff --git a/src/core/core.cpp b/src/core/core.cpp index 4406ae30e..7ba704f18 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -216,6 +216,14 @@ struct System::Impl {          }      } +    void SetNVDECActive(bool is_nvdec_active) { +        nvdec_active = is_nvdec_active; +    } + +    bool GetNVDECActive() { +        return nvdec_active; +    } +      void InitializeDebugger(System& system, u16 port) {          debugger = std::make_unique<Debugger>(system, port);      } @@ -485,6 +493,8 @@ struct System::Impl {      std::atomic_bool is_powered_on{};      bool exit_lock = false; +    bool nvdec_active{}; +      Reporter reporter;      std::unique_ptr<Memory::CheatEngine> cheat_engine;      std::unique_ptr<Tools::Freezer> memory_freezer; @@ -594,6 +604,14 @@ void System::UnstallApplication() {      impl->UnstallApplication();  } +void System::SetNVDECActive(bool is_nvdec_active) { +    impl->SetNVDECActive(is_nvdec_active); +} + +bool System::GetNVDECActive() { +    return impl->GetNVDECActive(); +} +  void System::InitializeDebugger() {      impl->InitializeDebugger(*this, Settings::values.gdbstub_port.GetValue());  } diff --git a/src/core/core.h b/src/core/core.h index 4f153154f..ff2e4bd30 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -189,6 +189,9 @@ public:      std::unique_lock<std::mutex> StallApplication();      void UnstallApplication(); +    void SetNVDECActive(bool is_nvdec_active); +    [[nodiscard]] bool GetNVDECActive(); +      /**       * Initialize the debugger.       */ diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp index 50f44f598..cd9ac2e75 100644 --- a/src/core/file_sys/control_metadata.cpp +++ b/src/core/file_sys/control_metadata.cpp @@ -23,8 +23,8 @@ const std::array<const char*, 16> LANGUAGE_NAMES{{      "Portuguese",      "Russian",      "Korean", -    "Taiwanese", -    "Chinese", +    "TraditionalChinese", +    "SimplifiedChinese",      "BrazilianPortuguese",  }}; @@ -45,17 +45,17 @@ constexpr std::array<Language, 18> language_to_codes = {{      Language::German,      Language::Italian,      Language::Spanish, -    Language::Chinese, +    Language::SimplifiedChinese,      Language::Korean,      Language::Dutch,      Language::Portuguese,      Language::Russian, -    Language::Taiwanese, +    Language::TraditionalChinese,      Language::BritishEnglish,      Language::CanadianFrench,      Language::LatinAmericanSpanish, -    Language::Chinese, -    Language::Taiwanese, +    Language::SimplifiedChinese, +    Language::TraditionalChinese,      Language::BrazilianPortuguese,  }}; diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h index 6a81873b1..c98efb00d 100644 --- a/src/core/file_sys/control_metadata.h +++ b/src/core/file_sys/control_metadata.h @@ -84,8 +84,8 @@ enum class Language : u8 {      Portuguese = 10,      Russian = 11,      Korean = 12, -    Taiwanese = 13, -    Chinese = 14, +    TraditionalChinese = 13, +    SimplifiedChinese = 14,      BrazilianPortuguese = 15,      Default = 255, diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index f786f2add..4e61d4335 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -25,6 +25,8 @@  #include "core/file_sys/vfs_layered.h"  #include "core/file_sys/vfs_vector.h"  #include "core/hle/service/filesystem/filesystem.h" +#include "core/hle/service/ns/language.h" +#include "core/hle/service/set/set.h"  #include "core/loader/loader.h"  #include "core/loader/nso.h"  #include "core/memory/cheat_engine.h" @@ -624,8 +626,37 @@ PatchManager::Metadata PatchManager::ParseControlNCA(const NCA& nca) const {      auto nacp = nacp_file == nullptr ? nullptr : std::make_unique<NACP>(nacp_file); +    // Get language code from settings +    const auto language_code = +        Service::Set::GetLanguageCodeFromIndex(Settings::values.language_index.GetValue()); + +    // Convert to application language and get priority list +    const auto application_language = +        Service::NS::ConvertToApplicationLanguage(language_code) +            .value_or(Service::NS::ApplicationLanguage::AmericanEnglish); +    const auto language_priority_list = +        Service::NS::GetApplicationLanguagePriorityList(application_language); + +    // Convert to language names +    auto priority_language_names = FileSys::LANGUAGE_NAMES; // Copy +    if (language_priority_list) { +        for (size_t i = 0; i < priority_language_names.size(); ++i) { +            // Relies on FileSys::LANGUAGE_NAMES being in the same order as +            // Service::NS::ApplicationLanguage +            const auto language_index = static_cast<u8>(language_priority_list->at(i)); + +            if (language_index < FileSys::LANGUAGE_NAMES.size()) { +                priority_language_names[i] = FileSys::LANGUAGE_NAMES[language_index]; +            } else { +                // Not a catastrophe, unlikely to happen +                LOG_WARNING(Loader, "Invalid language index {}", language_index); +            } +        } +    } + +    // Get first matching icon      VirtualFile icon_file; -    for (const auto& language : FileSys::LANGUAGE_NAMES) { +    for (const auto& language : priority_language_names) {          icon_file = extracted->GetFile(std::string("icon_").append(language).append(".dat"));          if (icon_file != nullptr) {              break; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 0c7aee1b8..dc45169ad 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -69,7 +69,7 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in  void nvhost_nvdec::OnOpen(DeviceFD fd) {      LOG_INFO(Service_NVDRV, "NVDEC video stream started"); -    system.AudioCore().SetNVDECActive(true); +    system.SetNVDECActive(true);  }  void nvhost_nvdec::OnClose(DeviceFD fd) { @@ -79,7 +79,7 @@ void nvhost_nvdec::OnClose(DeviceFD fd) {      if (iter != host1x_file.fd_to_id.end()) {          system.GPU().ClearCdmaInstance(iter->second);      } -    system.AudioCore().SetNVDECActive(false); +    system.SetNVDECActive(false);  }  } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index 4988e6e17..da2d5890f 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -324,6 +324,10 @@ s64 Nvnflinger::GetNextTicks() const {              speed_scale = 0.01f;          }      } +    if (system.GetNVDECActive() && settings.use_video_framerate.GetValue()) { +        // Run at intended presentation rate during video playback. +        speed_scale = 1.f; +    }      // As an extension, treat nonpositive swap interval as framerate multiplier.      const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval) diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 94e3000ba..bf6439530 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -133,8 +133,8 @@ add_library(video_core STATIC      renderer_opengl/gl_shader_util.h      renderer_opengl/gl_state_tracker.cpp      renderer_opengl/gl_state_tracker.h -    renderer_opengl/gl_stream_buffer.cpp -    renderer_opengl/gl_stream_buffer.h +    renderer_opengl/gl_staging_buffer_pool.cpp +    renderer_opengl/gl_staging_buffer_pool.h      renderer_opengl/gl_texture_cache.cpp      renderer_opengl/gl_texture_cache.h      renderer_opengl/gl_texture_cache_base.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f1ad5f7cb..2f281b370 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -478,7 +478,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {      if (committed_ranges.empty()) {          if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { -              async_buffers.emplace_back(std::optional<Async_Buffer>{});          }          return; @@ -539,7 +538,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {      committed_ranges.clear();      if (downloads.empty()) {          if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { -              async_buffers.emplace_back(std::optional<Async_Buffer>{});          }          return; @@ -691,7 +689,7 @@ void BufferCache<P>::BindHostIndexBuffer() {      const u32 size = channel_state->index_buffer.size;      const auto& draw_state = maxwell3d->draw_manager->GetDrawState();      if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { -        if constexpr (USE_MEMORY_MAPS) { +        if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {              auto upload_staging = runtime.UploadStagingBuffer(size);              std::array<BufferCopy, 1> copies{                  {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; @@ -1462,7 +1460,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,  template <class P>  void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,                                    std::span<BufferCopy> copies) { -    if constexpr (USE_MEMORY_MAPS) { +    if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {          MappedUploadMemory(buffer, total_size_bytes, copies);      } else {          ImmediateUploadMemory(buffer, largest_copy, copies); @@ -1473,7 +1471,7 @@ template <class P>  void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,                                             [[maybe_unused]] u64 largest_copy,                                             [[maybe_unused]] std::span<const BufferCopy> copies) { -    if constexpr (!USE_MEMORY_MAPS) { +    if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {          std::span<u8> immediate_buffer;          for (const BufferCopy& copy : copies) {              std::span<const u8> upload_span; @@ -1532,7 +1530,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,      auto& buffer = slot_buffers[buffer_id];      SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); -    if constexpr (USE_MEMORY_MAPS) { +    if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {          auto upload_staging = runtime.UploadStagingBuffer(copy_size);          std::array copies{BufferCopy{              .src_offset = upload_staging.offset, diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index c689fe06b..60a1f285e 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -173,6 +173,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf      static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;      static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;      static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; +    static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;      static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;      static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6d3bda192..c419714d4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {      return views.back().texture.handle;  } -BufferCacheRuntime::BufferCacheRuntime(const Device& device_) -    : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, +BufferCacheRuntime::BufferCacheRuntime(const Device& device_, +                                       StagingBufferPool& staging_buffer_pool_) +    : device{device_}, staging_buffer_pool{staging_buffer_pool_}, +      has_fast_buffer_sub_data{device.HasFastBufferSubData()},        use_assembly_shaders{device.UseAssemblyShaders()},        has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},        stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { @@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)      }();  } +StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) { +    return staging_buffer_pool.RequestUploadBuffer(size); +} + +StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) { +    return staging_buffer_pool.RequestDownloadBuffer(size); +} +  u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {      if (device.CanReportMemoryUsage()) {          return device_access_memory - device.GetCurrentDedicatedVideoMemory(); @@ -147,15 +157,49 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {      return 2_GiB;  } -void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, -                                    std::span<const VideoCommon::BufferCopy> copies) { +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer, +                                    std::span<const VideoCommon::BufferCopy> copies, bool barrier) { +    if (barrier) { +        PreCopyBarrier(); +    }      for (const VideoCommon::BufferCopy& copy : copies) { -        glCopyNamedBufferSubData( -            src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), -            static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); +        glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset), +                                 static_cast<GLintptr>(copy.dst_offset), +                                 static_cast<GLsizeiptr>(copy.size)); +    } +    if (barrier) { +        PostCopyBarrier();      }  } +void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, +                                    std::span<const VideoCommon::BufferCopy> copies, bool barrier) { +    CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, +                                    std::span<const VideoCommon::BufferCopy> copies, bool barrier) { +    CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier); +} + +void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, +                                    std::span<const VideoCommon::BufferCopy> copies) { +    CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies); +} + +void BufferCacheRuntime::PreCopyBarrier() { +    // TODO: finer grained barrier? +    glMemoryBarrier(GL_ALL_BARRIER_BITS); +} + +void BufferCacheRuntime::PostCopyBarrier() { +    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); +} + +void BufferCacheRuntime::Finish() { +    glFinish(); +} +  void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {      glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),                                static_cast<GLsizeiptr>(size), GL_RED, GL_UNSIGNED_INT, &value); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 18d3c3ac0..a24991585 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -12,7 +12,7 @@  #include "video_core/rasterizer_interface.h"  #include "video_core/renderer_opengl/gl_device.h"  #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"  namespace OpenGL { @@ -60,11 +60,28 @@ class BufferCacheRuntime {  public:      static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); -    explicit BufferCacheRuntime(const Device& device_); +    explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_); + +    [[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size); + +    [[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size); + +    void CopyBuffer(GLuint dst_buffer, GLuint src_buffer, +                    std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + +    void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer, +                    std::span<const VideoCommon::BufferCopy> copies, bool barrier = true); + +    void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer, +                    std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);      void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,                      std::span<const VideoCommon::BufferCopy> copies); +    void PreCopyBarrier(); +    void PostCopyBarrier(); +    void Finish(); +      void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);      void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); @@ -169,6 +186,7 @@ private:      };      const Device& device; +    StagingBufferPool& staging_buffer_pool;      bool has_fast_buffer_sub_data = false;      bool use_assembly_shaders = false; @@ -201,7 +219,7 @@ private:  struct BufferCacheParams {      using Runtime = OpenGL::BufferCacheRuntime;      using Buffer = OpenGL::Buffer; -    using Async_Buffer = u32; +    using Async_Buffer = OpenGL::StagingBufferMap;      using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;      static constexpr bool IS_OPENGL = true; @@ -209,9 +227,12 @@ struct BufferCacheParams {      static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;      static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;      static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; -    static constexpr bool USE_MEMORY_MAPS = false; +    static constexpr bool USE_MEMORY_MAPS = true;      static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;      static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; + +    // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads +    static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;  };  using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f5baa0f3c..fc711c44a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -24,6 +24,7 @@  #include "video_core/renderer_opengl/gl_query_cache.h"  #include "video_core/renderer_opengl/gl_rasterizer.h"  #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"  #include "video_core/renderer_opengl/gl_texture_cache.h"  #include "video_core/renderer_opengl/maxwell_to_gl.h"  #include "video_core/renderer_opengl/renderer_opengl.h" @@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra                                     StateTracker& state_tracker_)      : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),        program_manager(program_manager_), state_tracker(state_tracker_), -      texture_cache_runtime(device, program_manager, state_tracker), -      texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), +      texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), +      texture_cache(texture_cache_runtime, *this), +      buffer_cache_runtime(device, staging_buffer_pool),        buffer_cache(*this, cpu_memory_, buffer_cache_runtime),        shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,                     state_tracker, gpu.ShaderNotify()), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 410d8ffc5..a73ad15c1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -230,6 +230,7 @@ private:      ProgramManager& program_manager;      StateTracker& state_tracker; +    StagingBufferPool staging_buffer_pool;      TextureCacheRuntime texture_cache_runtime;      TextureCache texture_cache;      BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp new file mode 100644 index 000000000..bbb06e51f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.cpp @@ -0,0 +1,150 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <array> +#include <memory> +#include <span> + +#include <glad/glad.h> + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/bit_util.h" +#include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h" + +MICROPROFILE_DEFINE(OpenGL_BufferRequest, "OpenGL", "BufferRequest", MP_RGB(128, 128, 192)); + +namespace OpenGL { + +StagingBufferMap::~StagingBufferMap() { +    if (sync) { +        sync->Create(); +    } +} + +StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) +    : storage_flags{storage_flags_}, map_flags{map_flags_} {} + +StagingBuffers::~StagingBuffers() = default; + +StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) { +    MICROPROFILE_SCOPE(OpenGL_BufferRequest); + +    const size_t index = RequestBuffer(requested_size); +    OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; +    sync_indices[index] = insert_fence ? ++current_sync_index : 0; +    return StagingBufferMap{ +        .mapped_span = std::span(maps[index], requested_size), +        .sync = sync, +        .buffer = buffers[index].handle, +    }; +} + +size_t StagingBuffers::RequestBuffer(size_t requested_size) { +    if (const std::optional<size_t> index = FindBuffer(requested_size); index) { +        return *index; +    } + +    OGLBuffer& buffer = buffers.emplace_back(); +    buffer.Create(); +    const auto next_pow2_size = Common::NextPow2(requested_size); +    glNamedBufferStorage(buffer.handle, next_pow2_size, nullptr, +                         storage_flags | GL_MAP_PERSISTENT_BIT); +    maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, next_pow2_size, +                                                          map_flags | GL_MAP_PERSISTENT_BIT))); +    syncs.emplace_back(); +    sync_indices.emplace_back(); +    sizes.push_back(next_pow2_size); + +    ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && +           maps.size() == sizes.size()); + +    return buffers.size() - 1; +} + +std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) { +    size_t known_unsignaled_index = current_sync_index + 1; +    size_t smallest_buffer = std::numeric_limits<size_t>::max(); +    std::optional<size_t> found; +    const size_t num_buffers = sizes.size(); +    for (size_t index = 0; index < num_buffers; ++index) { +        const size_t buffer_size = sizes[index]; +        if (buffer_size < requested_size || buffer_size >= smallest_buffer) { +            continue; +        } +        if (syncs[index].handle != 0) { +            if (sync_indices[index] >= known_unsignaled_index) { +                // This fence is later than a fence that is known to not be signaled +                continue; +            } +            if (!syncs[index].IsSignaled()) { +                // Since this fence hasn't been signaled, it's safe to assume all later +                // fences haven't been signaled either +                known_unsignaled_index = std::min(known_unsignaled_index, sync_indices[index]); +                continue; +            } +            syncs[index].Release(); +        } +        smallest_buffer = buffer_size; +        found = index; +    } +    return found; +} + +StreamBuffer::StreamBuffer() { +    static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; +    buffer.Create(); +    glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); +    glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); +    mapped_pointer = +        static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); +    for (OGLSync& sync : fences) { +        sync.Create(); +    } +} + +std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { +    ASSERT(size < REGION_SIZE); +    for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; +         ++region) { +        fences[region].Create(); +    } +    used_iterator = iterator; + +    for (size_t region = Region(free_iterator) + 1, +                region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); +         region < region_end; ++region) { +        glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); +        fences[region].Release(); +    } +    if (iterator + size >= free_iterator) { +        free_iterator = iterator + size; +    } +    if (iterator + size > STREAM_BUFFER_SIZE) { +        for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { +            fences[region].Create(); +        } +        used_iterator = 0; +        iterator = 0; +        free_iterator = size; + +        for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { +            glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); +            fences[region].Release(); +        } +    } +    const size_t offset = iterator; +    iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); +    return {std::span(mapped_pointer + offset, size), offset}; +} + +StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) { +    return upload_buffers.RequestMap(size, true); +} + +StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) { +    return download_buffers.RequestMap(size, false); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h index 8fe927aaf..60f72d3a0 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_staging_buffer_pool.h @@ -4,8 +4,10 @@  #pragma once  #include <array> +#include <optional>  #include <span>  #include <utility> +#include <vector>  #include <glad/glad.h> @@ -17,6 +19,35 @@ namespace OpenGL {  using namespace Common::Literals; +struct StagingBufferMap { +    ~StagingBufferMap(); + +    std::span<u8> mapped_span; +    size_t offset = 0; +    OGLSync* sync; +    GLuint buffer; +}; + +struct StagingBuffers { +    explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); +    ~StagingBuffers(); + +    StagingBufferMap RequestMap(size_t requested_size, bool insert_fence); + +    size_t RequestBuffer(size_t requested_size); + +    std::optional<size_t> FindBuffer(size_t requested_size); + +    std::vector<OGLSync> syncs; +    std::vector<OGLBuffer> buffers; +    std::vector<u8*> maps; +    std::vector<size_t> sizes; +    std::vector<size_t> sync_indices; +    GLenum storage_flags; +    GLenum map_flags; +    size_t current_sync_index = 0; +}; +  class StreamBuffer {      static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;      static constexpr size_t NUM_SYNCS = 16; @@ -48,4 +79,17 @@ private:      std::array<OGLSync, NUM_SYNCS> fences;  }; +class StagingBufferPool { +public: +    StagingBufferPool() = default; +    ~StagingBufferPool() = default; + +    StagingBufferMap RequestUploadBuffer(size_t size); +    StagingBufferMap RequestDownloadBuffer(size_t size); + +private: +    StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; +    StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; +}; +  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp deleted file mode 100644 index 2005c8993..000000000 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include <array> -#include <memory> -#include <span> - -#include <glad/glad.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" - -namespace OpenGL { - -StreamBuffer::StreamBuffer() { -    static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; -    buffer.Create(); -    glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); -    glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); -    mapped_pointer = -        static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); -    for (OGLSync& sync : fences) { -        sync.Create(); -    } -} - -std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { -    ASSERT(size < REGION_SIZE); -    for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; -         ++region) { -        fences[region].Create(); -    } -    used_iterator = iterator; - -    for (size_t region = Region(free_iterator) + 1, -                region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); -         region < region_end; ++region) { -        glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); -        fences[region].Release(); -    } -    if (iterator + size >= free_iterator) { -        free_iterator = iterator + size; -    } -    if (iterator + size > STREAM_BUFFER_SIZE) { -        for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { -            fences[region].Create(); -        } -        used_iterator = 0; -        iterator = 0; -        free_iterator = size; - -        for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { -            glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); -            fences[region].Release(); -        } -    } -    const size_t offset = iterator; -    iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); -    return {std::span(mapped_pointer + offset, size), offset}; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 56d0ff869..1c5dbcdd8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -456,19 +456,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form          return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;      }  } -  } // Anonymous namespace -ImageBufferMap::~ImageBufferMap() { -    if (sync) { -        sync->Create(); -    } -} -  TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, -                                         StateTracker& state_tracker_) -    : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager), -      format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} { +                                         StateTracker& state_tracker_, +                                         StagingBufferPool& staging_buffer_pool_) +    : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_}, +      util_shaders(program_manager), format_conversion_pass{util_shaders}, +      resolution{Settings::values.resolution_info} {      static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};      for (size_t i = 0; i < TARGETS.size(); ++i) {          const GLenum target = TARGETS[i]; @@ -558,12 +553,12 @@ void TextureCacheRuntime::Finish() {      glFinish();  } -ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { -    return upload_buffers.RequestMap(size, true); +StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { +    return staging_buffer_pool.RequestUploadBuffer(size);  } -ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { -    return download_buffers.RequestMap(size, false); +StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { +    return staging_buffer_pool.RequestDownloadBuffer(size);  }  u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { @@ -648,7 +643,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,                             is_linear ? GL_LINEAR : GL_NEAREST);  } -void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, +void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,                                                  std::span<const SwizzleParameters> swizzles) {      switch (image.info.type) {      case ImageType::e2D: @@ -690,64 +685,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {      return device.HasASTC();  } -TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) -    : storage_flags{storage_flags_}, map_flags{map_flags_} {} - -TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default; - -ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size, -                                                               bool insert_fence) { -    const size_t index = RequestBuffer(requested_size); -    OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; -    return ImageBufferMap{ -        .mapped_span = std::span(maps[index], requested_size), -        .sync = sync, -        .buffer = buffers[index].handle, -    }; -} - -size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { -    if (const std::optional<size_t> index = FindBuffer(requested_size); index) { -        return *index; -    } - -    OGLBuffer& buffer = buffers.emplace_back(); -    buffer.Create(); -    glNamedBufferStorage(buffer.handle, requested_size, nullptr, -                         storage_flags | GL_MAP_PERSISTENT_BIT); -    maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size, -                                                          map_flags | GL_MAP_PERSISTENT_BIT))); - -    syncs.emplace_back(); -    sizes.push_back(requested_size); - -    ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() && -           maps.size() == sizes.size()); - -    return buffers.size() - 1; -} - -std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) { -    size_t smallest_buffer = std::numeric_limits<size_t>::max(); -    std::optional<size_t> found; -    const size_t num_buffers = sizes.size(); -    for (size_t index = 0; index < num_buffers; ++index) { -        const size_t buffer_size = sizes[index]; -        if (buffer_size < requested_size || buffer_size >= smallest_buffer) { -            continue; -        } -        if (syncs[index].handle != 0) { -            if (!syncs[index].IsSignaled()) { -                continue; -            } -            syncs[index].Release(); -        } -        smallest_buffer = buffer_size; -        found = index; -    } -    return found; -} -  Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,               VAddr cpu_addr_)      : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { @@ -823,7 +760,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,      }  } -void Image::UploadMemory(const ImageBufferMap& map, +void Image::UploadMemory(const StagingBufferMap& map,                           std::span<const VideoCommon::BufferImageCopy> copies) {      UploadMemory(map.buffer, map.offset, copies);  } @@ -870,7 +807,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b      }  } -void Image::DownloadMemory(ImageBufferMap& map, +void Image::DownloadMemory(StagingBufferMap& map,                             std::span<const VideoCommon::BufferImageCopy> copies) {      DownloadMemory(map.buffer, map.offset, copies);  } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3e9b3302b..1148b73d7 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -11,6 +11,7 @@  #include "shader_recompiler/shader_info.h"  #include "video_core/renderer_opengl/gl_device.h"  #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"  #include "video_core/renderer_opengl/util_shaders.h"  #include "video_core/texture_cache/image_view_base.h"  #include "video_core/texture_cache/texture_cache_base.h" @@ -37,15 +38,6 @@ using VideoCommon::Region2D;  using VideoCommon::RenderTargets;  using VideoCommon::SlotVector; -struct ImageBufferMap { -    ~ImageBufferMap(); - -    std::span<u8> mapped_span; -    size_t offset = 0; -    OGLSync* sync; -    GLuint buffer; -}; -  struct FormatProperties {      GLenum compatibility_class;      bool compatibility_by_size; @@ -74,14 +66,15 @@ class TextureCacheRuntime {  public:      explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, -                                 StateTracker& state_tracker); +                                 StateTracker& state_tracker, +                                 StagingBufferPool& staging_buffer_pool);      ~TextureCacheRuntime();      void Finish(); -    ImageBufferMap UploadStagingBuffer(size_t size); +    StagingBufferMap UploadStagingBuffer(size_t size); -    ImageBufferMap DownloadStagingBuffer(size_t size); +    StagingBufferMap DownloadStagingBuffer(size_t size);      u64 GetDeviceLocalMemory() const {          return device_access_memory; @@ -120,7 +113,7 @@ public:                           const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,                           Tegra::Engines::Fermi2D::Operation operation); -    void AccelerateImageUpload(Image& image, const ImageBufferMap& map, +    void AccelerateImageUpload(Image& image, const StagingBufferMap& map,                                 std::span<const VideoCommon::SwizzleParameters> swizzles);      void InsertUploadMemoryBarrier(); @@ -149,35 +142,16 @@ public:      }  private: -    struct StagingBuffers { -        explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); -        ~StagingBuffers(); - -        ImageBufferMap RequestMap(size_t requested_size, bool insert_fence); - -        size_t RequestBuffer(size_t requested_size); - -        std::optional<size_t> FindBuffer(size_t requested_size); - -        std::vector<OGLSync> syncs; -        std::vector<OGLBuffer> buffers; -        std::vector<u8*> maps; -        std::vector<size_t> sizes; -        GLenum storage_flags; -        GLenum map_flags; -    }; -      const Device& device;      StateTracker& state_tracker; +    StagingBufferPool& staging_buffer_pool; +      UtilShaders util_shaders;      FormatConversionPass format_conversion_pass;      std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;      bool has_broken_texture_view_formats = false; -    StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT}; -    StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT}; -      OGLTexture null_image_1d_array;      OGLTexture null_image_cube_array;      OGLTexture null_image_3d; @@ -213,7 +187,7 @@ public:      void UploadMemory(GLuint buffer_handle, size_t buffer_offset,                        std::span<const VideoCommon::BufferImageCopy> copies); -    void UploadMemory(const ImageBufferMap& map, +    void UploadMemory(const StagingBufferMap& map,                        std::span<const VideoCommon::BufferImageCopy> copies);      void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, @@ -222,7 +196,8 @@ public:      void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,                          std::span<const VideoCommon::BufferImageCopy> copies); -    void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); +    void DownloadMemory(StagingBufferMap& map, +                        std::span<const VideoCommon::BufferImageCopy> copies);      GLuint StorageHandle() noexcept; diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 2c7ac210b..544982d18 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -19,6 +19,7 @@  #include "video_core/host_shaders/pitch_unswizzle_comp.h"  #include "video_core/renderer_opengl/gl_shader_manager.h"  #include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"  #include "video_core/renderer_opengl/gl_texture_cache.h"  #include "video_core/renderer_opengl/util_shaders.h"  #include "video_core/texture_cache/accelerated_swizzle.h" @@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)  UtilShaders::~UtilShaders() = default; -void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, +void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,                               std::span<const VideoCommon::SwizzleParameters> swizzles) {      static constexpr GLuint BINDING_INPUT_BUFFER = 0;      static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; @@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,      program_manager.RestoreGuestCompute();  } -void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,                                        std::span<const SwizzleParameters> swizzles) {      static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};      static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; @@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,      program_manager.RestoreGuestCompute();  } -void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, +void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,                                        std::span<const SwizzleParameters> swizzles) {      static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; @@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,      program_manager.RestoreGuestCompute();  } -void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, +void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,                                std::span<const SwizzleParameters> swizzles) {      static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};      static constexpr GLuint BINDING_INPUT_BUFFER = 0; diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 9013808e7..feecd404c 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -16,23 +16,23 @@ namespace OpenGL {  class Image;  class ProgramManager; -struct ImageBufferMap; +struct StagingBufferMap;  class UtilShaders {  public:      explicit UtilShaders(ProgramManager& program_manager);      ~UtilShaders(); -    void ASTCDecode(Image& image, const ImageBufferMap& map, +    void ASTCDecode(Image& image, const StagingBufferMap& map,                      std::span<const VideoCommon::SwizzleParameters> swizzles); -    void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, +    void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,                               std::span<const VideoCommon::SwizzleParameters> swizzles); -    void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, +    void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,                               std::span<const VideoCommon::SwizzleParameters> swizzles); -    void PitchUpload(Image& image, const ImageBufferMap& map, +    void PitchUpload(Image& image, const StagingBufferMap& map,                       std::span<const VideoCommon::SwizzleParameters> swizzles);      void CopyBC4(Image& dst_image, Image& src_image, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 794dd0758..92b4f7859 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -157,6 +157,7 @@ struct BufferCacheParams {      static constexpr bool USE_MEMORY_MAPS = true;      static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;      static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; +    static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;  };  using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2cf082c5d..c7f7448e9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -850,15 +850,11 @@ void TextureCache<P>::PopAsyncFlushes() {  template <class P>  ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) {      const ImageInfo dst_info(operand); -    const ImageId dst_id = FindDMAImage(dst_info, operand.address); -    if (!dst_id) { -        return NULL_IMAGE_ID; -    } -    auto& image = slot_images[dst_id]; -    if (False(image.flags & ImageFlagBits::GpuModified)) { -        // No need to waste time on an image that's synced with guest +    const ImageId image_id = FindDMAImage(dst_info, operand.address); +    if (!image_id) {          return NULL_IMAGE_ID;      } +    auto& image = slot_images[image_id];      if (!is_upload && !image.info.dma_downloaded) {          // Force a full sync.          image.info.dma_downloaded = true; @@ -868,7 +864,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo      if (!base) {          return NULL_IMAGE_ID;      } -    return dst_id; +    return image_id;  }  template <class P> diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b692b4be4..d62a103a1 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -85,7 +85,6 @@  // Define extensions which must be supported.  #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME)                                            \ -    EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME)                                             \      EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME)                                 \      EXTENSION_NAME(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME)                                        \      EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME)                             \ @@ -105,6 +104,7 @@      EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME)                                 \      EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME)                                 \      EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME)                                       \ +    EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME)                                             \      EXTENSION_NAME(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME)                               \      EXTENSION_NAME(VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME)                               \      EXTENSION_NAME(VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME)                                           \ @@ -141,9 +141,6 @@      FEATURE_NAME(features, vertexPipelineStoresAndAtomics)                                         \      FEATURE_NAME(features, wideLines)                                                              \      FEATURE_NAME(host_query_reset, hostQueryReset)                                                 \ -    FEATURE_NAME(robustness2, nullDescriptor)                                                      \ -    FEATURE_NAME(robustness2, robustBufferAccess2)                                                 \ -    FEATURE_NAME(robustness2, robustImageAccess2)                                                  \      FEATURE_NAME(shader_demote_to_helper_invocation, shaderDemoteToHelperInvocation)               \      FEATURE_NAME(shader_draw_parameters, shaderDrawParameters)                                     \      FEATURE_NAME(variable_pointer, variablePointers)                                               \ @@ -156,6 +153,9 @@      FEATURE_NAME(index_type_uint8, indexTypeUint8)                                                 \      FEATURE_NAME(primitive_topology_list_restart, primitiveTopologyListRestart)                    \      FEATURE_NAME(provoking_vertex, provokingVertexLast)                                            \ +    FEATURE_NAME(robustness2, nullDescriptor)                                                      \ +    FEATURE_NAME(robustness2, robustBufferAccess2)                                                 \ +    FEATURE_NAME(robustness2, robustImageAccess2)                                                  \      FEATURE_NAME(shader_float16_int8, shaderFloat16)                                               \      FEATURE_NAME(shader_float16_int8, shaderInt8)                                                  \      FEATURE_NAME(timeline_semaphore, timelineSemaphore)                                            \ diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 896863f87..0463ac8b9 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -42,6 +42,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {          Settings::values.use_vulkan_driver_pipeline_cache.GetValue());      ui->enable_compute_pipelines_checkbox->setChecked(          Settings::values.enable_compute_pipelines.GetValue()); +    ui->use_video_framerate_checkbox->setChecked(Settings::values.use_video_framerate.GetValue());      if (Settings::IsConfiguringGlobal()) {          ui->gpu_accuracy->setCurrentIndex( @@ -91,6 +92,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {      ConfigurationShared::ApplyPerGameSetting(&Settings::values.enable_compute_pipelines,                                               ui->enable_compute_pipelines_checkbox,                                               enable_compute_pipelines); +    ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_video_framerate, +                                             ui->use_video_framerate_checkbox, use_video_framerate);  }  void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { @@ -125,6 +128,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {              Settings::values.max_anisotropy.UsingGlobal());          ui->enable_compute_pipelines_checkbox->setEnabled(              Settings::values.enable_compute_pipelines.UsingGlobal()); +        ui->use_video_framerate_checkbox->setEnabled( +            Settings::values.use_video_framerate.UsingGlobal());          return;      } @@ -149,6 +154,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {      ConfigurationShared::SetColoredTristate(ui->enable_compute_pipelines_checkbox,                                              Settings::values.enable_compute_pipelines,                                              enable_compute_pipelines); +    ConfigurationShared::SetColoredTristate(ui->use_video_framerate_checkbox, +                                            Settings::values.use_video_framerate, +                                            use_video_framerate);      ConfigurationShared::SetColoredComboBox(          ui->gpu_accuracy, ui->label_gpu_accuracy,          static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 1c7b636b9..a4dc8ceb0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -47,6 +47,7 @@ private:      ConfigurationShared::CheckState use_fast_gpu_time;      ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache;      ConfigurationShared::CheckState enable_compute_pipelines; +    ConfigurationShared::CheckState use_video_framerate;      const Core::System& system;  }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 37757a918..e7f0ef6be 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -192,6 +192,16 @@ Compute pipelines are always enabled on all other drivers.</string>           </widget>          </item>          <item> +         <widget class="QCheckBox" name="use_video_framerate_checkbox"> +          <property name="toolTip"> +           <string>Run the game at normal speed during video playback, even when the framerate is unlocked.</string> +          </property> +          <property name="text"> +           <string>Sync to framerate of video playback</string> +          </property> +         </widget> +        </item> +        <item>           <widget class="QWidget" name="af_layout" native="true">            <layout class="QHBoxLayout" name="horizontalLayout_1">             <property name="leftMargin"> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 82bce9a3a..145fea5f1 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -3491,6 +3491,7 @@ void GMainWindow::ResetWindowSize1080() {  void GMainWindow::OnConfigure() {      const auto old_theme = UISettings::values.theme;      const bool old_discord_presence = UISettings::values.enable_discord_presence.GetValue(); +    const auto old_language_index = Settings::values.language_index.GetValue();      Settings::SetConfiguringGlobal(true);      ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get(), *system, @@ -3559,7 +3560,7 @@ void GMainWindow::OnConfigure() {      emit UpdateThemedIcons();      const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); -    if (reload) { +    if (reload || Settings::values.language_index.GetValue() != old_language_index) {          game_list->PopulateAsync(UISettings::values.game_dirs);      } | 
