diff options
Diffstat (limited to 'src')
160 files changed, 1750 insertions, 1263 deletions
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt index f0a6753a9..b1771b424 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/activities/EmulationActivity.kt @@ -27,13 +27,13 @@ import android.view.MotionEvent import android.view.Surface import android.view.View import android.view.inputmethod.InputMethodManager +import android.widget.Toast import androidx.activity.viewModels import androidx.appcompat.app.AppCompatActivity import androidx.core.view.WindowCompat import androidx.core.view.WindowInsetsCompat import androidx.core.view.WindowInsetsControllerCompat import androidx.navigation.fragment.NavHostFragment -import kotlin.math.roundToInt import org.yuzu.yuzu_emu.NativeLibrary import org.yuzu.yuzu_emu.R import org.yuzu.yuzu_emu.databinding.ActivityEmulationBinding @@ -44,8 +44,10 @@ import org.yuzu.yuzu_emu.model.Game import org.yuzu.yuzu_emu.utils.ControllerMappingHelper import org.yuzu.yuzu_emu.utils.ForegroundService import org.yuzu.yuzu_emu.utils.InputHandler +import org.yuzu.yuzu_emu.utils.MemoryUtil import org.yuzu.yuzu_emu.utils.NfcReader import org.yuzu.yuzu_emu.utils.ThemeHelper +import kotlin.math.roundToInt class EmulationActivity : AppCompatActivity(), SensorEventListener { private lateinit var binding: ActivityEmulationBinding @@ -102,6 +104,19 @@ class EmulationActivity : AppCompatActivity(), SensorEventListener { inputHandler = InputHandler() inputHandler.initialize() + val memoryUtil = MemoryUtil(this) + if (memoryUtil.isLessThan(8, MemoryUtil.Gb)) { + Toast.makeText( + this, + getString( + R.string.device_memory_inadequate, + memoryUtil.getDeviceRAM(), + "8 ${getString(R.string.memory_gigabyte)}" + ), + Toast.LENGTH_LONG + ).show() + } + // Start a foreground service to prevent the app from getting killed in the background val startIntent = Intent(this, ForegroundService::class.java) startForegroundService(startIntent) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt index 6f8adbba5..5a36ffad4 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt @@ -68,79 +68,109 @@ class HomeSettingsFragment : Fragment() { override fun onViewCreated(view: View, savedInstanceState: Bundle?) { mainActivity = requireActivity() as MainActivity - val optionsList: MutableList<HomeSetting> = mutableListOf( - HomeSetting( - R.string.advanced_settings, - R.string.settings_description, - R.drawable.ic_settings - ) { SettingsActivity.launch(requireContext(), SettingsFile.FILE_NAME_CONFIG, "") }, - HomeSetting( - R.string.open_user_folder, - R.string.open_user_folder_description, - R.drawable.ic_folder_open - ) { openFileManager() }, - HomeSetting( - R.string.preferences_theme, - R.string.theme_and_color_description, - R.drawable.ic_palette - ) { SettingsActivity.launch(requireContext(), Settings.SECTION_THEME, "") }, - HomeSetting( - R.string.install_gpu_driver, - R.string.install_gpu_driver_description, - R.drawable.ic_exit - ) { driverInstaller() }, - HomeSetting( - R.string.install_amiibo_keys, - R.string.install_amiibo_keys_description, - R.drawable.ic_nfc - ) { mainActivity.getAmiiboKey.launch(arrayOf("*/*")) }, - HomeSetting( - R.string.install_game_content, - R.string.install_game_content_description, - R.drawable.ic_system_update_alt - ) { mainActivity.installGameUpdate.launch(arrayOf("*/*")) }, - HomeSetting( - R.string.select_games_folder, - R.string.select_games_folder_description, - R.drawable.ic_add - ) { - mainActivity.getGamesDirectory.launch(Intent(Intent.ACTION_OPEN_DOCUMENT_TREE).data) - }, - HomeSetting( - R.string.manage_save_data, - R.string.import_export_saves_description, - R.drawable.ic_save - ) { - ImportExportSavesFragment().show( - parentFragmentManager, - ImportExportSavesFragment.TAG + val optionsList: MutableList<HomeSetting> = mutableListOf<HomeSetting>().apply { + add( + HomeSetting( + R.string.advanced_settings, + R.string.settings_description, + R.drawable.ic_settings + ) { SettingsActivity.launch(requireContext(), SettingsFile.FILE_NAME_CONFIG, "") } + ) + add( + HomeSetting( + R.string.open_user_folder, + R.string.open_user_folder_description, + R.drawable.ic_folder_open + ) { openFileManager() } + ) + add( + HomeSetting( + R.string.preferences_theme, + R.string.theme_and_color_description, + R.drawable.ic_palette + ) { SettingsActivity.launch(requireContext(), Settings.SECTION_THEME, "") } + ) + + if (GpuDriverHelper.supportsCustomDriverLoading()) { + add( + HomeSetting( + R.string.install_gpu_driver, + R.string.install_gpu_driver_description, + R.drawable.ic_exit + ) { driverInstaller() } ) - }, - HomeSetting( - R.string.install_prod_keys, - R.string.install_prod_keys_description, - R.drawable.ic_unlock - ) { mainActivity.getProdKey.launch(arrayOf("*/*")) }, - HomeSetting( - R.string.install_firmware, - R.string.install_firmware_description, - R.drawable.ic_firmware - ) { mainActivity.getFirmware.launch(arrayOf("application/zip")) }, - HomeSetting( - R.string.share_log, - R.string.share_log_description, - R.drawable.ic_log - ) { shareLog() }, - HomeSetting( - R.string.about, - R.string.about_description, - R.drawable.ic_info_outline - ) { - exitTransition = MaterialSharedAxis(MaterialSharedAxis.X, true) - parentFragmentManager.primaryNavigationFragment?.findNavController() - ?.navigate(R.id.action_homeSettingsFragment_to_aboutFragment) } - ) + + add( + HomeSetting( + R.string.install_amiibo_keys, + R.string.install_amiibo_keys_description, + R.drawable.ic_nfc + ) { mainActivity.getAmiiboKey.launch(arrayOf("*/*")) } + ) + add( + HomeSetting( + R.string.install_game_content, + R.string.install_game_content_description, + R.drawable.ic_system_update_alt + ) { mainActivity.installGameUpdate.launch(arrayOf("*/*")) } + ) + add( + HomeSetting( + R.string.select_games_folder, + R.string.select_games_folder_description, + R.drawable.ic_add + ) { + mainActivity.getGamesDirectory.launch( + Intent(Intent.ACTION_OPEN_DOCUMENT_TREE).data + ) + } + ) + add( + HomeSetting( + R.string.manage_save_data, + R.string.import_export_saves_description, + R.drawable.ic_save + ) { + ImportExportSavesFragment().show( + parentFragmentManager, + ImportExportSavesFragment.TAG + ) + } + ) + add( + HomeSetting( + R.string.install_prod_keys, + R.string.install_prod_keys_description, + R.drawable.ic_unlock + ) { mainActivity.getProdKey.launch(arrayOf("*/*")) } + ) + add( + HomeSetting( + R.string.install_firmware, + R.string.install_firmware_description, + R.drawable.ic_firmware + ) { mainActivity.getFirmware.launch(arrayOf("application/zip")) } + ) + add( + HomeSetting( + R.string.share_log, + R.string.share_log_description, + R.drawable.ic_log + ) { shareLog() } + ) + add( + HomeSetting( + R.string.about, + R.string.about_description, + R.drawable.ic_info_outline + ) { + exitTransition = MaterialSharedAxis(MaterialSharedAxis.X, true) + parentFragmentManager.primaryNavigationFragment?.findNavController() + ?.navigate(R.id.action_homeSettingsFragment_to_aboutFragment) + } + ) + } if (!BuildConfig.PREMIUM) { optionsList.add( diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt new file mode 100644 index 000000000..b29b627e9 --- /dev/null +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/LongMessageDialogFragment.kt @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +package org.yuzu.yuzu_emu.fragments + +import android.app.Dialog +import android.content.Intent +import android.net.Uri +import android.os.Bundle +import androidx.fragment.app.DialogFragment +import com.google.android.material.dialog.MaterialAlertDialogBuilder +import org.yuzu.yuzu_emu.R + +class LongMessageDialogFragment : DialogFragment() { + override fun onCreateDialog(savedInstanceState: Bundle?): Dialog { + val titleId = requireArguments().getInt(TITLE) + val description = requireArguments().getString(DESCRIPTION) + val helpLinkId = requireArguments().getInt(HELP_LINK) + + val dialog = MaterialAlertDialogBuilder(requireContext()) + .setPositiveButton(R.string.close, null) + .setTitle(titleId) + .setMessage(description) + + if (helpLinkId != 0) { + dialog.setNeutralButton(R.string.learn_more) { _, _ -> + openLink(getString(helpLinkId)) + } + } + + return dialog.show() + } + + private fun openLink(link: String) { + val intent = Intent(Intent.ACTION_VIEW, Uri.parse(link)) + startActivity(intent) + } + + companion object { + const val TAG = "LongMessageDialogFragment" + + private const val TITLE = "Title" + private const val DESCRIPTION = "Description" + private const val HELP_LINK = "Link" + + fun newInstance( + titleId: Int, + description: String, + helpLinkId: Int = 0 + ): LongMessageDialogFragment { + val dialog = LongMessageDialogFragment() + val bundle = Bundle() + bundle.apply { + putInt(TITLE, titleId) + putString(DESCRIPTION, description) + putInt(HELP_LINK, helpLinkId) + } + dialog.arguments = bundle + return dialog + } + } +} diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt index cc1d87f1b..3086cfad3 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt @@ -4,6 +4,7 @@ package org.yuzu.yuzu_emu.ui.main import android.content.Intent +import android.net.Uri import android.os.Bundle import android.view.View import android.view.ViewGroup.MarginLayoutParams @@ -42,6 +43,7 @@ import org.yuzu.yuzu_emu.features.settings.model.SettingsViewModel import org.yuzu.yuzu_emu.features.settings.ui.SettingsActivity import org.yuzu.yuzu_emu.features.settings.utils.SettingsFile import org.yuzu.yuzu_emu.fragments.IndeterminateProgressDialogFragment +import org.yuzu.yuzu_emu.fragments.LongMessageDialogFragment import org.yuzu.yuzu_emu.fragments.MessageDialogFragment import org.yuzu.yuzu_emu.model.GamesViewModel import org.yuzu.yuzu_emu.model.HomeViewModel @@ -481,62 +483,110 @@ class MainActivity : AppCompatActivity(), ThemeProvider { } } - val installGameUpdate = - registerForActivityResult(ActivityResultContracts.OpenDocument()) { - if (it == null) { - return@registerForActivityResult - } - + val installGameUpdate = registerForActivityResult( + ActivityResultContracts.OpenMultipleDocuments() + ) { documents: List<Uri> -> + if (documents.isNotEmpty()) { IndeterminateProgressDialogFragment.newInstance( this@MainActivity, R.string.install_game_content ) { - val result = NativeLibrary.installFileToNand(it.toString()) + var installSuccess = 0 + var installOverwrite = 0 + var errorBaseGame = 0 + var errorExtension = 0 + var errorOther = 0 + var errorTotal = 0 lifecycleScope.launch { - withContext(Dispatchers.Main) { - when (result) { + documents.forEach { + when (NativeLibrary.installFileToNand(it.toString())) { NativeLibrary.InstallFileToNandResult.Success -> { - Toast.makeText( - applicationContext, - R.string.install_game_content_success, - Toast.LENGTH_SHORT - ).show() + installSuccess += 1 } NativeLibrary.InstallFileToNandResult.SuccessFileOverwritten -> { - Toast.makeText( - applicationContext, - R.string.install_game_content_success_overwrite, - Toast.LENGTH_SHORT - ).show() + installOverwrite += 1 } NativeLibrary.InstallFileToNandResult.ErrorBaseGame -> { - MessageDialogFragment.newInstance( - R.string.install_game_content_failure, - R.string.install_game_content_failure_base - ).show(supportFragmentManager, MessageDialogFragment.TAG) + errorBaseGame += 1 } NativeLibrary.InstallFileToNandResult.ErrorFilenameExtension -> { - MessageDialogFragment.newInstance( - R.string.install_game_content_failure, - R.string.install_game_content_failure_file_extension, - R.string.install_game_content_help_link - ).show(supportFragmentManager, MessageDialogFragment.TAG) + errorExtension += 1 } else -> { - MessageDialogFragment.newInstance( - R.string.install_game_content_failure, - R.string.install_game_content_failure_description, - R.string.install_game_content_help_link - ).show(supportFragmentManager, MessageDialogFragment.TAG) + errorOther += 1 } } } + withContext(Dispatchers.Main) { + val separator = System.getProperty("line.separator") ?: "\n" + val installResult = StringBuilder() + if (installSuccess > 0) { + installResult.append( + getString( + R.string.install_game_content_success_install, + installSuccess + ) + ) + installResult.append(separator) + } + if (installOverwrite > 0) { + installResult.append( + getString( + R.string.install_game_content_success_overwrite, + installOverwrite + ) + ) + installResult.append(separator) + } + errorTotal = errorBaseGame + errorExtension + errorOther + if (errorTotal > 0) { + installResult.append(separator) + installResult.append( + getString( + R.string.install_game_content_failed_count, + errorTotal + ) + ) + installResult.append(separator) + if (errorBaseGame > 0) { + installResult.append(separator) + installResult.append( + getString(R.string.install_game_content_failure_base) + ) + installResult.append(separator) + } + if (errorExtension > 0) { + installResult.append(separator) + installResult.append( + getString(R.string.install_game_content_failure_file_extension) + ) + installResult.append(separator) + } + if (errorOther > 0) { + installResult.append( + getString(R.string.install_game_content_failure_description) + ) + installResult.append(separator) + } + LongMessageDialogFragment.newInstance( + R.string.install_game_content_failure, + installResult.toString().trim(), + R.string.install_game_content_help_link + ).show(supportFragmentManager, LongMessageDialogFragment.TAG) + } else { + LongMessageDialogFragment.newInstance( + R.string.install_game_content_success, + installResult.toString().trim() + ).show(supportFragmentManager, LongMessageDialogFragment.TAG) + } + } } - return@newInstance result + return@newInstance installSuccess + installOverwrite + errorTotal }.show(supportFragmentManager, IndeterminateProgressDialogFragment.TAG) } + } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt index dad159481..1d4695a2a 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/GpuDriverHelper.kt @@ -113,6 +113,8 @@ object GpuDriverHelper { initializeDriverParameters(context) } + external fun supportsCustomDriverLoading(): Boolean + // Parse the custom driver metadata to retrieve the name. val customDriverName: String? get() { diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt new file mode 100644 index 000000000..18e5fa0b0 --- /dev/null +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/utils/MemoryUtil.kt @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +package org.yuzu.yuzu_emu.utils + +import android.app.ActivityManager +import android.content.Context +import org.yuzu.yuzu_emu.R +import java.util.Locale + +class MemoryUtil(val context: Context) { + + private val Long.floatForm: String + get() = String.format(Locale.ROOT, "%.2f", this.toDouble()) + + private fun bytesToSizeUnit(size: Long): String { + return when { + size < Kb -> "${size.floatForm} ${context.getString(R.string.memory_byte)}" + size < Mb -> "${(size / Kb).floatForm} ${context.getString(R.string.memory_kilobyte)}" + size < Gb -> "${(size / Mb).floatForm} ${context.getString(R.string.memory_megabyte)}" + size < Tb -> "${(size / Gb).floatForm} ${context.getString(R.string.memory_gigabyte)}" + size < Pb -> "${(size / Tb).floatForm} ${context.getString(R.string.memory_terabyte)}" + size < Eb -> "${(size / Pb).floatForm} ${context.getString(R.string.memory_petabyte)}" + else -> "${(size / Eb).floatForm} ${context.getString(R.string.memory_exabyte)}" + } + } + + private val totalMemory = + with(context.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager) { + val memInfo = ActivityManager.MemoryInfo() + getMemoryInfo(memInfo) + memInfo.totalMem + } + + fun isLessThan(minimum: Int, size: Long): Boolean { + return when (size) { + Kb -> totalMemory < Mb && totalMemory < minimum + Mb -> totalMemory < Gb && (totalMemory / Mb) < minimum + Gb -> totalMemory < Tb && (totalMemory / Gb) < minimum + Tb -> totalMemory < Pb && (totalMemory / Tb) < minimum + Pb -> totalMemory < Eb && (totalMemory / Pb) < minimum + Eb -> totalMemory / Eb < minimum + else -> totalMemory < Kb && totalMemory < minimum + } + } + + fun getDeviceRAM(): String { + return bytesToSizeUnit(totalMemory) + } + + companion object { + const val Kb: Long = 1024 + const val Mb = Kb * 1024 + const val Gb = Mb * 1024 + const val Tb = Gb * 1024 + const val Pb = Tb * 1024 + const val Eb = Pb * 1024 + } +} diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index f9617202b..f4fed0886 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -237,6 +237,7 @@ public: m_software_keyboard = android_keyboard.get(); m_system.SetShuttingDown(false); m_system.ApplySettings(); + Settings::LogSettings(); m_system.HIDCore().ReloadInputDevices(); m_system.SetAppletFrontendSet({ nullptr, // Amiibo Settings @@ -560,6 +561,26 @@ void JNICALL Java_org_yuzu_yuzu_1emu_NativeLibrary_initializeGpuDriver( GetJString(env, custom_driver_name), GetJString(env, file_redirect_dir)); } +[[maybe_unused]] static bool CheckKgslPresent() { + constexpr auto KgslPath{"/dev/kgsl-3d0"}; + + return access(KgslPath, F_OK) == 0; +} + +[[maybe_unused]] bool SupportsCustomDriver() { + return android_get_device_api_level() >= 28 && CheckKgslPresent(); +} + +jboolean JNICALL Java_org_yuzu_yuzu_1emu_utils_GpuDriverHelper_supportsCustomDriverLoading( + [[maybe_unused]] JNIEnv* env, [[maybe_unused]] jobject instance) { +#ifdef ARCHITECTURE_arm64 + // If the KGSL device exists custom drivers can be loaded using adrenotools + return SupportsCustomDriver(); +#else + return false; +#endif +} + jboolean Java_org_yuzu_yuzu_1emu_NativeLibrary_reloadKeys(JNIEnv* env, [[maybe_unused]] jclass clazz) { Core::Crypto::KeyManager::Instance().ReloadKeys(); diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index cc1d8c39d..21805d274 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -104,12 +104,14 @@ <string name="share_log_missing">No log file found</string> <string name="install_game_content">Install game content</string> <string name="install_game_content_description">Install game updates or DLC</string> - <string name="install_game_content_failure">Error installing file to NAND</string> - <string name="install_game_content_failure_description">Game content installation failed. Please ensure content is valid and that the prod.keys file is installed.</string> - <string name="install_game_content_failure_base">Installation of base games isn\'t permitted in order to avoid possible conflicts. Please select an update or DLC instead.</string> - <string name="install_game_content_failure_file_extension">The selected file type is not supported. Only NSP and XCI content is supported for this action. Please verify the game content is valid.</string> - <string name="install_game_content_success">Game content installed successfully</string> - <string name="install_game_content_success_overwrite">Game content was overwritten successfully</string> + <string name="install_game_content_failure">Error installing file(s) to NAND</string> + <string name="install_game_content_failure_description">Please ensure content(s) are valid and that the prod.keys file is installed.</string> + <string name="install_game_content_failure_base">Installation of base games isn\'t permitted in order to avoid possible conflicts.</string> + <string name="install_game_content_failure_file_extension">Only NSP and XCI content is supported. Please verify the game content(s) are valid.</string> + <string name="install_game_content_failed_count">%1$d installation error(s)</string> + <string name="install_game_content_success">Game content(s) installed successfully</string> + <string name="install_game_content_success_install">%1$d installed successfully</string> + <string name="install_game_content_success_overwrite">%1$d overwritten successfully</string> <string name="install_game_content_help_link">https://yuzu-emu.org/help/quickstart/#dumping-installed-updates</string> <!-- About screen strings --> @@ -270,6 +272,7 @@ <string name="fatal_error">Fatal Error</string> <string name="fatal_error_message">A fatal error occurred. Check the log for details.\nContinuing emulation may result in crashes and bugs.</string> <string name="performance_warning">Turning off this setting will significantly reduce emulation performance! For the best experience, it is recommended that you leave this setting enabled.</string> + <string name="device_memory_inadequate">Device RAM: %1$s\nRecommended: %2$s</string> <!-- Region Names --> <string name="region_japan">Japan</string> @@ -300,6 +303,15 @@ <string name="language_traditional_chinese">Traditional Chinese (æ£é«”䏿–‡)</string> <string name="language_brazilian_portuguese">Brazilian Portuguese (Português do Brasil)</string> + <!-- Memory Sizes --> + <string name="memory_byte">Byte</string> + <string name="memory_kilobyte">KB</string> + <string name="memory_megabyte">MB</string> + <string name="memory_gigabyte">GB</string> + <string name="memory_terabyte">TB</string> + <string name="memory_petabyte">PB</string> + <string name="memory_exabyte">EB</string> + <!-- Renderer APIs --> <string name="renderer_vulkan">Vulkan</string> <string name="renderer_none">None</string> diff --git a/src/audio_core/device/audio_buffers.h b/src/audio_core/device/audio_buffers.h index 15082f6c6..5d8ed0ef7 100644 --- a/src/audio_core/device/audio_buffers.h +++ b/src/audio_core/device/audio_buffers.h @@ -7,6 +7,7 @@ #include <mutex> #include <span> #include <vector> +#include <boost/container/static_vector.hpp> #include "audio_buffer.h" #include "audio_core/device/device_session.h" @@ -48,7 +49,7 @@ public: * * @param out_buffers - The buffers which were registered. */ - void RegisterBuffers(std::vector<AudioBuffer>& out_buffers) { + void RegisterBuffers(boost::container::static_vector<AudioBuffer, N>& out_buffers) { std::scoped_lock l{lock}; const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit), BufferAppendLimit - registered_count)}; @@ -162,7 +163,8 @@ public: * @param max_buffers - Maximum number of buffers to released. * @return The number of buffers released. */ - u32 GetRegisteredAppendedBuffers(std::vector<AudioBuffer>& buffers_flushed, u32 max_buffers) { + u32 GetRegisteredAppendedBuffers( + boost::container::static_vector<AudioBuffer, N>& buffers_flushed, u32 max_buffers) { std::scoped_lock l{lock}; if (registered_count + appended_count == 0) { return 0; @@ -270,7 +272,7 @@ public: */ bool FlushBuffers(u32& buffers_released) { std::scoped_lock l{lock}; - std::vector<AudioBuffer> buffers_flushed{}; + boost::container::static_vector<AudioBuffer, N> buffers_flushed{}; buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit); diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp index b5c0ef0e6..86811fcb8 100644 --- a/src/audio_core/device/device_session.cpp +++ b/src/audio_core/device/device_session.cpp @@ -79,7 +79,7 @@ void DeviceSession::ClearBuffers() { } } -void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const { +void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) { for (const auto& buffer : buffers) { Sink::SinkBuffer new_buffer{ .frames = buffer.size / (channel_count * sizeof(s16)), @@ -88,13 +88,13 @@ void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const { .consumed = false, }; + tmp_samples.resize_destructive(buffer.size / sizeof(s16)); if (type == Sink::StreamType::In) { - std::vector<s16> samples{}; - stream->AppendBuffer(new_buffer, samples); + stream->AppendBuffer(new_buffer, tmp_samples); } else { - std::vector<s16> samples(buffer.size / sizeof(s16)); - system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, samples.data(), buffer.size); - stream->AppendBuffer(new_buffer, samples); + system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(), + buffer.size); + stream->AppendBuffer(new_buffer, tmp_samples); } } } diff --git a/src/audio_core/device/device_session.h b/src/audio_core/device/device_session.h index 75f766c68..7d52f362d 100644 --- a/src/audio_core/device/device_session.h +++ b/src/audio_core/device/device_session.h @@ -10,6 +10,7 @@ #include "audio_core/common/common.h" #include "audio_core/sink/sink.h" +#include "common/scratch_buffer.h" #include "core/hle/service/audio/errors.h" namespace Core { @@ -62,7 +63,7 @@ public: * * @param buffers - The buffers to play. */ - void AppendBuffers(std::span<const AudioBuffer> buffers) const; + void AppendBuffers(std::span<const AudioBuffer> buffers); /** * (Audio In only) Pop samples from the backend, and write them back to this buffer's address. @@ -146,8 +147,8 @@ private: std::shared_ptr<Core::Timing::EventType> thread_event; /// Is this session initialised? bool initialized{}; - /// Buffer queue - std::vector<AudioBuffer> buffer_queue{}; + /// Temporary sample buffer + Common::ScratchBuffer<s16> tmp_samples{}; }; } // namespace AudioCore diff --git a/src/audio_core/in/audio_in_system.cpp b/src/audio_core/in/audio_in_system.cpp index e23e51758..579129121 100644 --- a/src/audio_core/in/audio_in_system.cpp +++ b/src/audio_core/in/audio_in_system.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <mutex> + #include "audio_core/audio_event.h" #include "audio_core/audio_manager.h" #include "audio_core/in/audio_in_system.h" @@ -89,7 +90,7 @@ Result System::Start() { session->Start(); state = State::Started; - std::vector<AudioBuffer> buffers_to_flush{}; + boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{}; buffers.RegisterBuffers(buffers_to_flush); session->AppendBuffers(buffers_to_flush); session->SetRingSize(static_cast<u32>(buffers_to_flush.size())); @@ -134,7 +135,7 @@ bool System::AppendBuffer(const AudioInBuffer& buffer, const u64 tag) { void System::RegisterBuffers() { if (state == State::Started) { - std::vector<AudioBuffer> registered_buffers{}; + boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{}; buffers.RegisterBuffers(registered_buffers); session->AppendBuffers(registered_buffers); } diff --git a/src/audio_core/out/audio_out_system.cpp b/src/audio_core/out/audio_out_system.cpp index bd13f7219..0adf64bd3 100644 --- a/src/audio_core/out/audio_out_system.cpp +++ b/src/audio_core/out/audio_out_system.cpp @@ -89,7 +89,7 @@ Result System::Start() { session->Start(); state = State::Started; - std::vector<AudioBuffer> buffers_to_flush{}; + boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{}; buffers.RegisterBuffers(buffers_to_flush); session->AppendBuffers(buffers_to_flush); session->SetRingSize(static_cast<u32>(buffers_to_flush.size())); @@ -134,7 +134,7 @@ bool System::AppendBuffer(const AudioOutBuffer& buffer, u64 tag) { void System::RegisterBuffers() { if (state == State::Started) { - std::vector<AudioBuffer> registered_buffers{}; + boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{}; buffers.RegisterBuffers(registered_buffers); session->AppendBuffers(registered_buffers); } diff --git a/src/audio_core/renderer/adsp/adsp.cpp b/src/audio_core/renderer/adsp/adsp.cpp index 74772fc50..b1db31e93 100644 --- a/src/audio_core/renderer/adsp/adsp.cpp +++ b/src/audio_core/renderer/adsp/adsp.cpp @@ -7,7 +7,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/memory.h" namespace AudioCore::AudioRenderer::ADSP { diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp index 8bc39f9f9..9ca716b60 100644 --- a/src/audio_core/renderer/adsp/audio_renderer.cpp +++ b/src/audio_core/renderer/adsp/audio_renderer.cpp @@ -13,7 +13,6 @@ #include "common/thread.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" MICROPROFILE_DEFINE(Audio_Renderer, "Audio", "DSP", MP_RGB(60, 19, 97)); @@ -144,6 +143,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) { mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_InitializeOK); + // 0.12 seconds (2304000 / 19200000) constexpr u64 max_process_time{2'304'000ULL}; while (!stop_token.stop_requested()) { @@ -184,8 +184,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) { u64 max_time{max_process_time}; if (index == 1 && command_buffer.applet_resource_user_id == mailbox->GetCommandBuffer(0).applet_resource_user_id) { - max_time = max_process_time - - Core::Timing::CyclesToNs(render_times_taken[0]).count(); + max_time = max_process_time - render_times_taken[0]; if (render_times_taken[0] > max_process_time) { max_time = 0; } diff --git a/src/audio_core/renderer/adsp/command_list_processor.cpp b/src/audio_core/renderer/adsp/command_list_processor.cpp index 7a300d216..3a0f1ae38 100644 --- a/src/audio_core/renderer/adsp/command_list_processor.cpp +++ b/src/audio_core/renderer/adsp/command_list_processor.cpp @@ -9,7 +9,6 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/memory.h" namespace AudioCore::AudioRenderer::ADSP { diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp index ff5d31bd6..f45933203 100644 --- a/src/audio_core/renderer/command/data_source/decode.cpp +++ b/src/audio_core/renderer/command/data_source/decode.cpp @@ -8,6 +8,7 @@ #include "audio_core/renderer/command/resample/resample.h" #include "common/fixed_point.h" #include "common/logging/log.h" +#include "common/scratch_buffer.h" #include "core/memory.h" namespace AudioCore::AudioRenderer { @@ -27,6 +28,7 @@ constexpr std::array<u8, 3> PitchBySrcQuality = {4, 8, 4}; template <typename T> static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, const DecodeArg& req) { + std::array<T, TempBufferSize> tmp_samples{}; constexpr s32 min{std::numeric_limits<s16>::min()}; constexpr s32 max{std::numeric_limits<s16>::max()}; @@ -49,18 +51,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, const u64 size{channel_count * samples_to_decode}; const u64 size_bytes{size * sizeof(T)}; - std::vector<T> samples(size); - memory.ReadBlockUnsafe(source, samples.data(), size_bytes); + memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes); if constexpr (std::is_floating_point_v<T>) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * + auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits<s16>::max())}; out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); } } else { for (u32 i = 0; i < samples_to_decode; i++) { - out_buffer[i] = samples[i * channel_count + req.target_channel]; + out_buffer[i] = tmp_samples[i * channel_count + req.target_channel]; } } } break; @@ -73,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, } const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; - std::vector<T> samples(samples_to_decode); - memory.ReadBlockUnsafe(source, samples.data(), samples_to_decode * sizeof(T)); + memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T)); if constexpr (std::is_floating_point_v<T>) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * + auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits<s16>::max())}; out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); } } else { - std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); + std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16)); } break; } @@ -101,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, */ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, const DecodeArg& req) { + std::array<u8, TempBufferSize> wavebuffer{}; constexpr u32 SamplesPerFrame{14}; constexpr u32 NibblesPerFrame{16}; @@ -138,9 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, } const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; - std::vector<u8> wavebuffer(size); - memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), - wavebuffer.size()); + memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size); auto context{req.adpcm_context}; auto header{context->header}; @@ -258,7 +257,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf u32 offset{voice_state.offset}; auto output_buffer{args.output}; - std::vector<s16> temp_buffer(TempBufferSize, 0); + std::array<s16, TempBufferSize> temp_buffer{}; while (remaining_sample_count > 0) { const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)}; diff --git a/src/audio_core/renderer/command/effect/compressor.cpp b/src/audio_core/renderer/command/effect/compressor.cpp index 7229618e8..ee9b68d5b 100644 --- a/src/audio_core/renderer/command/effect/compressor.cpp +++ b/src/audio_core/renderer/command/effect/compressor.cpp @@ -44,8 +44,8 @@ static void InitializeCompressorEffect(const CompressorInfo::ParameterVersion2& static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params, CompressorInfo::State& state, bool enabled, - std::vector<std::span<const s32>> input_buffers, - std::vector<std::span<s32>> output_buffers, u32 sample_count) { + std::span<std::span<const s32>> input_buffers, + std::span<std::span<s32>> output_buffers, u32 sample_count) { if (enabled) { auto state_00{state.unk_00}; auto state_04{state.unk_04}; @@ -124,8 +124,8 @@ void CompressorCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& } void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector<std::span<const s32>> input_buffers(parameter.channel_count); - std::vector<std::span<s32>> output_buffers(parameter.channel_count); + std::array<std::span<const s32>, MaxChannels> input_buffers{}; + std::array<std::span<s32>, MaxChannels> output_buffers{}; for (s16 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/delay.cpp b/src/audio_core/renderer/command/effect/delay.cpp index a4e408d40..e536cbb1e 100644 --- a/src/audio_core/renderer/command/effect/delay.cpp +++ b/src/audio_core/renderer/command/effect/delay.cpp @@ -51,7 +51,7 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params, state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor(); state.delay_lines[channel].sample_count = sample_count.to_int_floor(); state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0); - if (state.delay_lines[channel].buffer.size() == 0) { + if (state.delay_lines[channel].sample_count == 0) { state.delay_lines[channel].buffer.push_back(0); } state.delay_lines[channel].buffer_pos = 0; @@ -74,8 +74,8 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params, */ template <size_t NumChannels> static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, - std::vector<std::span<const s32>>& inputs, - std::vector<std::span<s32>>& outputs, const u32 sample_count) { + std::span<std::span<const s32>> inputs, std::span<std::span<s32>> outputs, + const u32 sample_count) { for (u32 sample_index = 0; sample_index < sample_count; sample_index++) { std::array<Common::FixedPoint<50, 14>, NumChannels> input_samples{}; for (u32 channel = 0; channel < NumChannels; channel++) { @@ -153,8 +153,8 @@ static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::St * @param sample_count - Number of samples to process. */ static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, - const bool enabled, std::vector<std::span<const s32>>& inputs, - std::vector<std::span<s32>>& outputs, const u32 sample_count) { + const bool enabled, std::span<std::span<const s32>> inputs, + std::span<std::span<s32>> outputs, const u32 sample_count) { if (!IsChannelCountValid(params.channel_count)) { LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count); @@ -208,8 +208,8 @@ void DelayCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proce } void DelayCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector<std::span<const s32>> input_buffers(parameter.channel_count); - std::vector<std::span<s32>> output_buffers(parameter.channel_count); + std::array<std::span<const s32>, MaxChannels> input_buffers{}; + std::array<std::span<s32>, MaxChannels> output_buffers{}; for (s16 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp index 27d8b9844..d2bfb67cc 100644 --- a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp +++ b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp @@ -408,8 +408,8 @@ void I3dl2ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& } void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector<std::span<const s32>> input_buffers(parameter.channel_count); - std::vector<std::span<s32>> output_buffers(parameter.channel_count); + std::array<std::span<const s32>, MaxChannels> input_buffers{}; + std::array<std::span<s32>, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/light_limiter.cpp b/src/audio_core/renderer/command/effect/light_limiter.cpp index e8fb0e2fc..4161a9821 100644 --- a/src/audio_core/renderer/command/effect/light_limiter.cpp +++ b/src/audio_core/renderer/command/effect/light_limiter.cpp @@ -47,8 +47,8 @@ static void InitializeLightLimiterEffect(const LightLimiterInfo::ParameterVersio */ static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params, LightLimiterInfo::State& state, const bool enabled, - std::vector<std::span<const s32>>& inputs, - std::vector<std::span<s32>>& outputs, const u32 sample_count, + std::span<std::span<const s32>> inputs, + std::span<std::span<s32>> outputs, const u32 sample_count, LightLimiterInfo::StatisticsInternal* statistics) { constexpr s64 min{std::numeric_limits<s32>::min()}; constexpr s64 max{std::numeric_limits<s32>::max()}; @@ -147,8 +147,8 @@ void LightLimiterVersion1Command::Dump([[maybe_unused]] const ADSP::CommandListP } void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) { - std::vector<std::span<const s32>> input_buffers(parameter.channel_count); - std::vector<std::span<s32>> output_buffers(parameter.channel_count); + std::array<std::span<const s32>, MaxChannels> input_buffers{}; + std::array<std::span<s32>, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, @@ -190,8 +190,8 @@ void LightLimiterVersion2Command::Dump([[maybe_unused]] const ADSP::CommandListP } void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) { - std::vector<std::span<const s32>> input_buffers(parameter.channel_count); - std::vector<std::span<s32>> output_buffers(parameter.channel_count); + std::array<std::span<const s32>, MaxChannels> input_buffers{}; + std::array<std::span<s32>, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp index 8b9b65214..fc2f15a5e 100644 --- a/src/audio_core/renderer/command/effect/reverb.cpp +++ b/src/audio_core/renderer/command/effect/reverb.cpp @@ -250,8 +250,8 @@ static Common::FixedPoint<50, 14> Axfx2AllPassTick(ReverbInfo::ReverbDelayLine& */ template <size_t NumChannels> static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, - std::vector<std::span<const s32>>& inputs, - std::vector<std::span<s32>>& outputs, const u32 sample_count) { + std::span<std::span<const s32>> inputs, + std::span<std::span<s32>> outputs, const u32 sample_count) { static constexpr std::array<u8, ReverbInfo::MaxDelayTaps> OutTapIndexes1Ch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -369,8 +369,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever * @param sample_count - Number of samples to process. */ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, - const bool enabled, std::vector<std::span<const s32>>& inputs, - std::vector<std::span<s32>>& outputs, const u32 sample_count) { + const bool enabled, std::span<std::span<const s32>> inputs, + std::span<std::span<s32>> outputs, const u32 sample_count) { if (enabled) { switch (params.channel_count) { case 0: @@ -412,8 +412,8 @@ void ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proc } void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { - std::vector<std::span<const s32>> input_buffers(parameter.channel_count); - std::vector<std::span<s32>> output_buffers(parameter.channel_count); + std::array<std::span<const s32>, MaxChannels> input_buffers{}; + std::array<std::span<s32>, MaxChannels> output_buffers{}; for (u32 i = 0; i < parameter.channel_count; i++) { input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, diff --git a/src/audio_core/renderer/command/performance/performance.cpp b/src/audio_core/renderer/command/performance/performance.cpp index 985958b03..4a881547f 100644 --- a/src/audio_core/renderer/command/performance/performance.cpp +++ b/src/audio_core/renderer/command/performance/performance.cpp @@ -5,7 +5,6 @@ #include "audio_core/renderer/command/performance/performance.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" namespace AudioCore::AudioRenderer { @@ -18,20 +17,18 @@ void PerformanceCommand::Process(const ADSP::CommandListProcessor& processor) { auto base{entry_address.translated_address}; if (state == PerformanceState::Start) { auto start_time_ptr{reinterpret_cast<u32*>(base + entry_address.entry_start_time_offset)}; - *start_time_ptr = static_cast<u32>( - Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - - processor.start_time - processor.current_processing_time) - .count()); + *start_time_ptr = + static_cast<u32>(processor.system->CoreTiming().GetClockTicks() - processor.start_time - + processor.current_processing_time); } else if (state == PerformanceState::Stop) { auto processed_time_ptr{ reinterpret_cast<u32*>(base + entry_address.entry_processed_time_offset)}; auto entry_count_ptr{ reinterpret_cast<u32*>(base + entry_address.header_entry_count_offset)}; - *processed_time_ptr = static_cast<u32>( - Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - - processor.start_time - processor.current_processing_time) - .count()); + *processed_time_ptr = + static_cast<u32>(processor.system->CoreTiming().GetClockTicks() - processor.start_time - + processor.current_processing_time); (*entry_count_ptr)++; } } diff --git a/src/audio_core/renderer/command/sink/circular_buffer.cpp b/src/audio_core/renderer/command/sink/circular_buffer.cpp index ded5afc94..e2ce59792 100644 --- a/src/audio_core/renderer/command/sink/circular_buffer.cpp +++ b/src/audio_core/renderer/command/sink/circular_buffer.cpp @@ -24,7 +24,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces constexpr s32 min{std::numeric_limits<s16>::min()}; constexpr s32 max{std::numeric_limits<s16>::max()}; - std::vector<s16> output(processor.sample_count); + std::array<s16, TargetSampleCount * MaxChannels> output{}; for (u32 channel = 0; channel < input_count; channel++) { auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count, processor.sample_count)}; @@ -33,7 +33,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces } processor.memory->WriteBlockUnsafe(address + pos, output.data(), - output.size() * sizeof(s16)); + processor.sample_count * sizeof(s16)); pos += static_cast<u32>(processor.sample_count * sizeof(s16)); if (pos >= size) { pos = 0; diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp index e88372a75..5f74dd7ad 100644 --- a/src/audio_core/renderer/command/sink/device.cpp +++ b/src/audio_core/renderer/command/sink/device.cpp @@ -33,8 +33,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) { .consumed{false}, }; - std::vector<s16> samples(out_buffer.frames * input_count); - + std::array<s16, TargetSampleCount * MaxChannels> samples{}; for (u32 channel = 0; channel < input_count; channel++) { const auto offset{inputs[channel] * out_buffer.frames}; @@ -45,7 +44,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) { } out_buffer.tag = reinterpret_cast<u64>(samples.data()); - stream->AppendBuffer(out_buffer, samples); + stream->AppendBuffer(out_buffer, {samples.data(), out_buffer.frames * input_count}); if (stream->IsPaused()) { stream->Start(); diff --git a/src/audio_core/renderer/mix/mix_context.cpp b/src/audio_core/renderer/mix/mix_context.cpp index 35b748ede..3a18ae7c2 100644 --- a/src/audio_core/renderer/mix/mix_context.cpp +++ b/src/audio_core/renderer/mix/mix_context.cpp @@ -125,10 +125,10 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) { return false; } - std::vector<s32> sorted_results{node_states.GetSortedResuls()}; - const auto result_size{std::min(count, static_cast<s32>(sorted_results.size()))}; + auto sorted_results{node_states.GetSortedResuls()}; + const auto result_size{std::min(count, static_cast<s32>(sorted_results.second))}; for (s32 i = 0; i < result_size; i++) { - sorted_mix_infos[i] = &mix_infos[sorted_results[i]]; + sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]]; } CalcMixBufferOffset(); diff --git a/src/audio_core/renderer/nodes/node_states.cpp b/src/audio_core/renderer/nodes/node_states.cpp index 1821a51e6..b7a44a54c 100644 --- a/src/audio_core/renderer/nodes/node_states.cpp +++ b/src/audio_core/renderer/nodes/node_states.cpp @@ -134,8 +134,8 @@ u32 NodeStates::GetNodeCount() const { return node_count; } -std::vector<s32> NodeStates::GetSortedResuls() const { - return {results.rbegin(), results.rbegin() + result_pos}; +std::pair<std::span<u32>::reverse_iterator, size_t> NodeStates::GetSortedResuls() const { + return {results.rbegin(), result_pos}; } } // namespace AudioCore::AudioRenderer diff --git a/src/audio_core/renderer/nodes/node_states.h b/src/audio_core/renderer/nodes/node_states.h index 94b1d1254..e768cd4b5 100644 --- a/src/audio_core/renderer/nodes/node_states.h +++ b/src/audio_core/renderer/nodes/node_states.h @@ -175,7 +175,7 @@ public: * * @return Vector of nodes in reverse order. */ - std::vector<s32> GetSortedResuls() const; + std::pair<std::span<u32>::reverse_iterator, size_t> GetSortedResuls() const; private: /// Number of nodes in the graph diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp index 53b258c4f..a23627472 100644 --- a/src/audio_core/renderer/system.cpp +++ b/src/audio_core/renderer/system.cpp @@ -444,6 +444,7 @@ Result System::Update(std::span<const u8> input, std::span<u8> performance, std: std::scoped_lock l{lock}; const auto start_time{core.CoreTiming().GetClockTicks()}; + std::memset(output.data(), 0, output.size()); InfoUpdater info_updater(input, output, process_handle, behavior); diff --git a/src/audio_core/sink/null_sink.h b/src/audio_core/sink/null_sink.h index 1215d3cd2..b6b43c93e 100644 --- a/src/audio_core/sink/null_sink.h +++ b/src/audio_core/sink/null_sink.h @@ -20,7 +20,7 @@ public: explicit NullSinkStreamImpl(Core::System& system_, StreamType type_) : SinkStream{system_, type_} {} ~NullSinkStreamImpl() override {} - void AppendBuffer(SinkBuffer&, std::vector<s16>&) override {} + void AppendBuffer(SinkBuffer&, std::span<s16>) override {} std::vector<s16> ReleaseBuffer(u64) override { return {}; } diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index f44fedfd5..404dcd0e9 100644 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp @@ -15,11 +15,10 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" namespace AudioCore::Sink { -void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) { +void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) { if (type == StreamType::In) { queue.enqueue(buffer); queued_buffers++; @@ -67,15 +66,16 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) { static_cast<s16>(std::clamp(right_sample, min, max)); } - samples.resize(samples.size() / system_channels * device_channels); + samples = samples.subspan(0, samples.size() / system_channels * device_channels); } else if (system_channels == 2 && device_channels == 6) { // We need moar samples! Not all games will provide 6 channel audio. // TODO: Implement some upmixing here. Currently just passthrough, with other // channels left as silence. - std::vector<s16> new_samples(samples.size() / system_channels * device_channels, 0); + auto new_size = samples.size() / system_channels * device_channels; + tmp_samples.resize_destructive(new_size); - for (u32 read_index = 0, write_index = 0; read_index < samples.size(); + for (u32 read_index = 0, write_index = 0; read_index < new_size; read_index += system_channels, write_index += device_channels) { const auto left_sample{static_cast<s16>(std::clamp( static_cast<s32>( @@ -83,7 +83,7 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) { volume), min, max))}; - new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample; + tmp_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample; const auto right_sample{static_cast<s16>(std::clamp( static_cast<s32>( @@ -91,9 +91,9 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) { volume), min, max))}; - new_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample; + tmp_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample; } - samples = std::move(new_samples); + samples = std::span<s16>(tmp_samples); } else if (volume != 1.0f) { for (u32 i = 0; i < samples.size(); i++) { diff --git a/src/audio_core/sink/sink_stream.h b/src/audio_core/sink/sink_stream.h index 41cbadc9c..98d72ace1 100644 --- a/src/audio_core/sink/sink_stream.h +++ b/src/audio_core/sink/sink_stream.h @@ -16,6 +16,7 @@ #include "common/polyfill_thread.h" #include "common/reader_writer_queue.h" #include "common/ring_buffer.h" +#include "common/scratch_buffer.h" #include "common/thread.h" namespace Core { @@ -170,7 +171,7 @@ public: * @param buffer - Audio buffer information to be queued. * @param samples - The s16 samples to be queue for playback. */ - virtual void AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples); + virtual void AppendBuffer(SinkBuffer& buffer, std::span<s16> samples); /** * Release a buffer. Audio In only, will fill a buffer with recorded samples. @@ -255,6 +256,8 @@ private: /// Signalled when ring buffer entries are consumed std::condition_variable_any release_cv; std::mutex release_mutex; + /// Temporary buffer for appending samples when upmixing + Common::ScratchBuffer<s16> tmp_samples{}; }; using SinkStreamPtr = std::unique_ptr<SinkStream>; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index efc4a9fe9..3adf13a3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64) x64/cpu_wait.h x64/native_clock.cpp x64/native_clock.h + x64/rdtsc.cpp + x64/rdtsc.h x64/xbyak_abi.h x64/xbyak_util.h ) diff --git a/src/common/fs/fs.cpp b/src/common/fs/fs.cpp index 6d66c926d..1baf6d746 100644 --- a/src/common/fs/fs.cpp +++ b/src/common/fs/fs.cpp @@ -436,7 +436,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable if (True(filter & DirEntryFilter::File) && entry.status().type() == fs::file_type::regular) { - if (!callback(entry.path())) { + if (!callback(entry)) { callback_error = true; break; } @@ -444,7 +444,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable if (True(filter & DirEntryFilter::Directory) && entry.status().type() == fs::file_type::directory) { - if (!callback(entry.path())) { + if (!callback(entry)) { callback_error = true; break; } @@ -493,7 +493,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path, if (True(filter & DirEntryFilter::File) && entry.status().type() == fs::file_type::regular) { - if (!callback(entry.path())) { + if (!callback(entry)) { callback_error = true; break; } @@ -501,7 +501,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path, if (True(filter & DirEntryFilter::Directory) && entry.status().type() == fs::file_type::directory) { - if (!callback(entry.path())) { + if (!callback(entry)) { callback_error = true; break; } diff --git a/src/common/fs/fs_types.h b/src/common/fs/fs_types.h index 5a4090c19..900f85d24 100644 --- a/src/common/fs/fs_types.h +++ b/src/common/fs/fs_types.h @@ -66,6 +66,6 @@ DECLARE_ENUM_FLAG_OPERATORS(DirEntryFilter); * @returns A boolean value. * Return true to indicate whether the callback is successful, false otherwise. */ -using DirEntryCallable = std::function<bool(const std::filesystem::path& path)>; +using DirEntryCallable = std::function<bool(const std::filesystem::directory_entry& entry)>; } // namespace Common::FS diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h index 4c328ab44..416680d44 100644 --- a/src/common/ring_buffer.h +++ b/src/common/ring_buffer.h @@ -9,6 +9,7 @@ #include <cstddef> #include <cstring> #include <new> +#include <span> #include <type_traits> #include <vector> @@ -53,7 +54,7 @@ public: return push_count; } - std::size_t Push(const std::vector<T>& input) { + std::size_t Push(const std::span<T> input) { return Push(input.data(), input.size()); } diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index a69a5a7af..6fe907953 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -3,6 +3,9 @@ #pragma once +#include <iterator> + +#include "common/concepts.h" #include "common/make_unique_for_overwrite.h" namespace Common { @@ -16,6 +19,12 @@ namespace Common { template <typename T> class ScratchBuffer { public: + using iterator = T*; + using const_iterator = const T*; + using value_type = T; + using element_type = T; + using iterator_category = std::contiguous_iterator_tag; + ScratchBuffer() = default; explicit ScratchBuffer(size_t initial_capacity) diff --git a/src/common/settings.h b/src/common/settings.h index 9682281b0..3aedf3850 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -483,6 +483,7 @@ struct Values { AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3, "astc_recompression"}; SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"}; + SwitchableSetting<bool> barrier_feedback_loops{true, "barrier_feedback_loops"}; SwitchableSetting<u8> bg_red{0, "bg_red"}; SwitchableSetting<u8> bg_green{0, "bg_green"}; diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp index 782859196..9415eed29 100644 --- a/src/common/steady_clock.cpp +++ b/src/common/steady_clock.cpp @@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() { // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. static constexpr s64 Multiplier = 100; // Convert Windows epoch to Unix epoch. - static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; + static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; FILETIME filetime; GetSystemTimePreciseAsFileTime(&filetime); return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + - static_cast<s64>(filetime.dwLowDateTime)) - - WindowsEpochToUnixEpochNS; + static_cast<s64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); } #endif diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 817e71d52..dc0dcbd68 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -2,88 +2,75 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/steady_clock.h" -#include "common/uint128.h" #include "common/wall_clock.h" #ifdef ARCHITECTURE_x86_64 #include "common/x64/cpu_detect.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" #endif namespace Common { class StandardWallClock final : public WallClock { public: - explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, - start_time{SteadyClock::Now()} {} + explicit StandardWallClock() : start_time{SteadyClock::Now()} {} - std::chrono::nanoseconds GetTimeNS() override { + std::chrono::nanoseconds GetTimeNS() const override { return SteadyClock::Now() - start_time; } - std::chrono::microseconds GetTimeUS() override { - return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS()); + std::chrono::microseconds GetTimeUS() const override { + return static_cast<std::chrono::microseconds>(GetHostTicksElapsed() / NsToUsRatio::den); } - std::chrono::milliseconds GetTimeMS() override { - return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS()); + std::chrono::milliseconds GetTimeMS() const override { + return static_cast<std::chrono::milliseconds>(GetHostTicksElapsed() / NsToMsRatio::den); } - u64 GetClockCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetCNTPCT() const override { + return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } - u64 GetCPUCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetGPUTick() const override { + return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; } - void Pause([[maybe_unused]] bool is_paused) override { - // Do nothing in this clock type. + u64 GetHostTicksNow() const override { + return static_cast<u64>(SteadyClock::Now().time_since_epoch().count()); + } + + u64 GetHostTicksElapsed() const override { + return static_cast<u64>(GetTimeNS().count()); + } + + bool IsNative() const override { + return false; } private: SteadyClock::time_point start_time; }; +std::unique_ptr<WallClock> CreateOptimalClock() { #ifdef ARCHITECTURE_x86_64 - -std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); - u64 rtsc_frequency = 0; - if (caps.invariant_tsc) { - rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency(); - } - // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: - // - A nanosecond - // - The emulated CPU frequency - // - The emulated clock counter frequency (CNTFRQ) - if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || - rtsc_frequency <= emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, - emulated_clock_frequency); + if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) { + return std::make_unique<X64::NativeClock>(caps.tsc_frequency); } else { - return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, - rtsc_frequency); + // Fallback to StandardWallClock if the hardware TSC + // - Is not invariant + // - Is not more precise than GPUTickFreq + return std::make_unique<StandardWallClock>(); } -} - #else - -std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); -} - + return std::make_unique<StandardWallClock>(); #endif +} -std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); +std::unique_ptr<WallClock> CreateStandardWallClock() { + return std::make_unique<StandardWallClock>(); } } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 157ec5eae..f45d3d8c5 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -5,6 +5,7 @@ #include <chrono> #include <memory> +#include <ratio> #include "common/common_types.h" @@ -12,50 +13,82 @@ namespace Common { class WallClock { public: - static constexpr u64 NS_RATIO = 1'000'000'000; - static constexpr u64 US_RATIO = 1'000'000; - static constexpr u64 MS_RATIO = 1'000; + static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz + static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz + static constexpr u64 CPUTickFreq = 1'020'000'000; // T210/4 A57 CPU Tick Frequency = 1020.0 MHz virtual ~WallClock() = default; - /// Returns current wall time in nanoseconds - [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; + /// @returns The time in nanoseconds since the construction of this clock. + virtual std::chrono::nanoseconds GetTimeNS() const = 0; - /// Returns current wall time in microseconds - [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; + /// @returns The time in microseconds since the construction of this clock. + virtual std::chrono::microseconds GetTimeUS() const = 0; - /// Returns current wall time in milliseconds - [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; + /// @returns The time in milliseconds since the construction of this clock. + virtual std::chrono::milliseconds GetTimeMS() const = 0; - /// Returns current wall time in emulated clock cycles - [[nodiscard]] virtual u64 GetClockCycles() = 0; + /// @returns The guest CNTPCT ticks since the construction of this clock. + virtual u64 GetCNTPCT() const = 0; - /// Returns current wall time in emulated cpu cycles - [[nodiscard]] virtual u64 GetCPUCycles() = 0; + /// @returns The guest GPU ticks since the construction of this clock. + virtual u64 GetGPUTick() const = 0; - virtual void Pause(bool is_paused) = 0; + /// @returns The raw host timer ticks since an indeterminate epoch. + virtual u64 GetHostTicksNow() const = 0; - /// Tells if the wall clock, uses the host CPU's hardware clock - [[nodiscard]] bool IsNative() const { - return is_native; + /// @returns The raw host timer ticks since the construction of this clock. + virtual u64 GetHostTicksElapsed() const = 0; + + /// @returns Whether the clock directly uses the host's hardware clock. + virtual bool IsNative() const = 0; + + static inline u64 NSToCNTPCT(u64 ns) { + return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; + } + + static inline u64 NSToGPUTick(u64 ns) { + return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + } + + // Cycle Timing + + static inline u64 CPUTickToNS(u64 cpu_tick) { + return cpu_tick * CPUTickToNsRatio::num / CPUTickToNsRatio::den; + } + + static inline u64 CPUTickToUS(u64 cpu_tick) { + return cpu_tick * CPUTickToUsRatio::num / CPUTickToUsRatio::den; + } + + static inline u64 CPUTickToCNTPCT(u64 cpu_tick) { + return cpu_tick * CPUTickToCNTPCTRatio::num / CPUTickToCNTPCTRatio::den; + } + + static inline u64 CPUTickToGPUTick(u64 cpu_tick) { + return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den; } protected: - explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) - : emulated_cpu_frequency{emulated_cpu_frequency_}, - emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} + using NsRatio = std::nano; + using UsRatio = std::micro; + using MsRatio = std::milli; + + using NsToUsRatio = std::ratio_divide<std::nano, std::micro>; + using NsToMsRatio = std::ratio_divide<std::nano, std::milli>; + using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>; + using NsToGPUTickRatio = std::ratio<GPUTickFreq, std::nano::den>; - u64 emulated_cpu_frequency; - u64 emulated_clock_frequency; + // Cycle Timing -private: - bool is_native; + using CPUTickToNsRatio = std::ratio<std::nano::den, CPUTickFreq>; + using CPUTickToUsRatio = std::ratio<std::micro::den, CPUTickFreq>; + using CPUTickToCNTPCTRatio = std::ratio<CNTFRQ, CPUTickFreq>; + using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>; }; -[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr<WallClock> CreateOptimalClock(); -[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr<WallClock> CreateStandardWallClock(); } // namespace Common diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..c998b1197 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/x64/cpu_detect.h" +#include "common/x64/rdtsc.h" #ifdef _WIN32 #include <windows.h> @@ -187,6 +188,8 @@ static CPUCaps Detect() { caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * caps.tsc_crystal_ratio_numerator / caps.tsc_crystal_ratio_denominator; + } else { + caps.tsc_frequency = X64::EstimateRDTSCFrequency(); } } diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..c53dd4945 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -9,19 +9,11 @@ #include "common/x64/cpu_detect.h" #include "common/x64/cpu_wait.h" +#include "common/x64/rdtsc.h" namespace Common::X64 { #ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} - __forceinline static void TPAUSE() { // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // For reference: @@ -32,16 +24,6 @@ __forceinline static void TPAUSE() { _tpause(0, FencedRDTSC() + PauseCycles); } #else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} - static void TPAUSE() { // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // For reference: diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..7d2a26bd9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -1,164 +1,50 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <array> -#include <chrono> -#include <thread> - -#include "common/atomic_ops.h" -#include "common/steady_clock.h" #include "common/uint128.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" -#ifdef _MSC_VER -#include <intrin.h> -#endif - -namespace Common { +namespace Common::X64 { -#ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} -#else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} -#endif +NativeClock::NativeClock(u64 rdtsc_frequency_) + : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, + ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, + us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, + ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, + cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)}, + gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {} -template <u64 Nearest> -static u64 RoundToNearest(u64 value) { - const auto mod = value % Nearest; - return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +std::chrono::nanoseconds NativeClock::GetTimeNS() const { + return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; } -u64 EstimateRDTSCFrequency() { - // Discard the first result measuring the rdtsc. - FencedRDTSC(); - std::this_thread::sleep_for(std::chrono::milliseconds{1}); - FencedRDTSC(); - - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 250 milliseconds. - std::this_thread::sleep_for(std::chrono::milliseconds{250}); - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast<u64>( - std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - return RoundToNearest<1000>(tsc_freq); +std::chrono::microseconds NativeClock::GetTimeUS() const { + return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)}; } -namespace X64 { -NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ - rtsc_frequency_} { - // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. - time_sync_thread = std::jthread{[this](std::stop_token token) { - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 10 seconds. - if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { - return; - } - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast<u64>( - std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - rtsc_frequency = tsc_freq; - CalculateAndSetFactors(); - }}; - - time_point.inner.last_measure = FencedRDTSC(); - time_point.inner.accumulated_ticks = 0U; - CalculateAndSetFactors(); +std::chrono::milliseconds NativeClock::GetTimeMS() const { + return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)}; } -u64 NativeClock::GetRTSC() { - TimePoint new_time_point{}; - TimePoint current_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - const u64 current_measure = FencedRDTSC(); - u64 diff = current_measure - current_time_point.inner.last_measure; - diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) - new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure - ? current_measure - : current_time_point.inner.last_measure; - new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - return new_time_point.inner.accumulated_ticks; +u64 NativeClock::GetCNTPCT() const { + return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); } -void NativeClock::Pause(bool is_paused) { - if (!is_paused) { - TimePoint current_time_point{}; - TimePoint new_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - new_time_point.pack = current_time_point.pack; - new_time_point.inner.last_measure = FencedRDTSC(); - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - } +u64 NativeClock::GetGPUTick() const { + return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor); } -std::chrono::nanoseconds NativeClock::GetTimeNS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; +u64 NativeClock::GetHostTicksNow() const { + return FencedRDTSC(); } -std::chrono::microseconds NativeClock::GetTimeUS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; +u64 NativeClock::GetHostTicksElapsed() const { + return FencedRDTSC() - start_ticks; } -std::chrono::milliseconds NativeClock::GetTimeMS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; +bool NativeClock::IsNative() const { + return true; } -u64 NativeClock::GetClockCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, clock_rtsc_factor); -} - -u64 NativeClock::GetCPUCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, cpu_rtsc_factor); -} - -void NativeClock::CalculateAndSetFactors() { - ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); - us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); - ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); - clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); - cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); -} - -} // namespace X64 - -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..334415eff 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -3,58 +3,39 @@ #pragma once -#include "common/polyfill_thread.h" #include "common/wall_clock.h" -namespace Common { +namespace Common::X64 { -namespace X64 { class NativeClock final : public WallClock { public: - explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_); + explicit NativeClock(u64 rdtsc_frequency_); - std::chrono::nanoseconds GetTimeNS() override; + std::chrono::nanoseconds GetTimeNS() const override; - std::chrono::microseconds GetTimeUS() override; + std::chrono::microseconds GetTimeUS() const override; - std::chrono::milliseconds GetTimeMS() override; + std::chrono::milliseconds GetTimeMS() const override; - u64 GetClockCycles() override; + u64 GetCNTPCT() const override; - u64 GetCPUCycles() override; + u64 GetGPUTick() const override; - void Pause(bool is_paused) override; + u64 GetHostTicksNow() const override; -private: - u64 GetRTSC(); - - void CalculateAndSetFactors(); - - union alignas(16) TimePoint { - TimePoint() : pack{} {} - u128 pack{}; - struct Inner { - u64 last_measure{}; - u64 accumulated_ticks{}; - } inner; - }; - - TimePoint time_point; + u64 GetHostTicksElapsed() const override; - // factors - u64 clock_rtsc_factor{}; - u64 cpu_rtsc_factor{}; - u64 ns_rtsc_factor{}; - u64 us_rtsc_factor{}; - u64 ms_rtsc_factor{}; + bool IsNative() const override; - u64 rtsc_frequency; - - std::jthread time_sync_thread; +private: + u64 start_ticks; + u64 rdtsc_frequency; + + u64 ns_rdtsc_factor; + u64 us_rdtsc_factor; + u64 ms_rdtsc_factor; + u64 cntpct_rdtsc_factor; + u64 gputick_rdtsc_factor; }; -} // namespace X64 - -u64 EstimateRDTSCFrequency(); -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <thread> + +#include "common/steady_clock.h" +#include "common/uint128.h" +#include "common/x64/rdtsc.h" + +namespace Common::X64 { + +template <u64 Nearest> +static u64 RoundToNearest(u64 value) { + const auto mod = value % Nearest; + return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + +u64 EstimateRDTSCFrequency() { + // Discard the first result measuring the rdtsc. + FencedRDTSC(); + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + FencedRDTSC(); + + // Get the current time. + const auto start_time = RealTimeClock::Now(); + const u64 tsc_start = FencedRDTSC(); + // Wait for 100 milliseconds. + std::this_thread::sleep_for(std::chrono::milliseconds{100}); + const auto end_time = RealTimeClock::Now(); + const u64 tsc_end = FencedRDTSC(); + // Calculate differences. + const u64 timer_diff = static_cast<u64>( + std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); + const u64 tsc_diff = tsc_end - tsc_start; + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); + return RoundToNearest<100'000>(tsc_freq); +} + +} // namespace Common::X64 diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#include "common/common_types.h" + +namespace Common::X64 { + +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} +#else +static inline u64 FencedRDTSC() { + u64 eax; + u64 edx; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; +} +#endif + +u64 EstimateRDTSCFrequency(); + +} // namespace Common::X64 diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 227c431bc..3655b8478 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -14,7 +14,6 @@ add_library(core STATIC core.h core_timing.cpp core_timing.h - core_timing_util.h cpu_manager.cpp cpu_manager.h crypto/aes_util.cpp diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 4f2692b05..4f0a3f8ea 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -16,12 +16,11 @@ #include "common/microprofile.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hardware_properties.h" namespace Core::Timing { -constexpr s64 MAX_SLICE_LENGTH = 4000; +constexpr s64 MAX_SLICE_LENGTH = 10000; std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) { return std::make_shared<EventType>(std::move(callback), std::move(name)); @@ -45,9 +44,7 @@ struct CoreTiming::Event { } }; -CoreTiming::CoreTiming() - : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)}, - event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} +CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {} CoreTiming::~CoreTiming() { Reset(); @@ -68,7 +65,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) { on_thread_init = std::move(on_thread_init_); event_fifo_id = 0; shutting_down = false; - ticks = 0; + cpu_ticks = 0; const auto empty_timed_callback = [](std::uintptr_t, u64, std::chrono::nanoseconds) -> std::optional<std::chrono::nanoseconds> { return std::nullopt; }; ev_lost = CreateEvent("_lost_event", empty_timed_callback); @@ -173,38 +170,30 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, } void CoreTiming::AddTicks(u64 ticks_to_add) { - ticks += ticks_to_add; - downcount -= static_cast<s64>(ticks); + cpu_ticks += ticks_to_add; + downcount -= static_cast<s64>(cpu_ticks); } void CoreTiming::Idle() { - if (!event_queue.empty()) { - const u64 next_event_time = event_queue.front().time; - const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U; - if (next_ticks > ticks) { - ticks = next_ticks; - } - return; - } - ticks += 1000U; + cpu_ticks += 1000U; } void CoreTiming::ResetTicks() { downcount = MAX_SLICE_LENGTH; } -u64 CoreTiming::GetCPUTicks() const { +u64 CoreTiming::GetClockTicks() const { if (is_multicore) [[likely]] { - return cpu_clock->GetCPUCycles(); + return clock->GetCNTPCT(); } - return ticks; + return Common::WallClock::CPUTickToCNTPCT(cpu_ticks); } -u64 CoreTiming::GetClockTicks() const { +u64 CoreTiming::GetGPUTicks() const { if (is_multicore) [[likely]] { - return cpu_clock->GetClockCycles(); + return clock->GetGPUTick(); } - return CpuCyclesToClockCycles(ticks); + return Common::WallClock::CPUTickToGPUTick(cpu_ticks); } std::optional<s64> CoreTiming::Advance() { @@ -297,9 +286,7 @@ void CoreTiming::ThreadLoop() { } paused_set = true; - event_clock->Pause(true); pause_event.Wait(); - event_clock->Pause(false); } } @@ -315,25 +302,18 @@ void CoreTiming::Reset() { has_started = false; } -std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const { - if (is_multicore) [[likely]] { - return cpu_clock->GetTimeNS(); - } - return CyclesToNs(ticks); -} - std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { if (is_multicore) [[likely]] { - return event_clock->GetTimeNS(); + return clock->GetTimeNS(); } - return CyclesToNs(ticks); + return std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)}; } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { if (is_multicore) [[likely]] { - return event_clock->GetTimeUS(); + return clock->GetTimeUS(); } - return CyclesToUs(ticks); + return std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)}; } } // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index e7c4a949f..10db1de55 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -116,14 +116,11 @@ public: return downcount; } - /// Returns current time in emulated CPU cycles - u64 GetCPUTicks() const; - - /// Returns current time in emulated in Clock cycles + /// Returns the current CNTPCT tick value. u64 GetClockTicks() const; - /// Returns current time in nanoseconds. - std::chrono::nanoseconds GetCPUTimeNs() const; + /// Returns the current GPU tick value. + u64 GetGPUTicks() const; /// Returns current time in microseconds. std::chrono::microseconds GetGlobalTimeUs() const; @@ -142,8 +139,7 @@ private: void Reset(); - std::unique_ptr<Common::WallClock> cpu_clock; - std::unique_ptr<Common::WallClock> event_clock; + std::unique_ptr<Common::WallClock> clock; s64 global_timer = 0; @@ -171,7 +167,7 @@ private: s64 pause_end_time{}; /// Cycle timing - u64 ticks{}; + u64 cpu_ticks{}; s64 downcount{}; }; diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h deleted file mode 100644 index fe5aaefc7..000000000 --- a/src/core/core_timing_util.h +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include <chrono> - -#include "common/common_types.h" -#include "core/hardware_properties.h" - -namespace Core::Timing { - -namespace detail { -constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000; -constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000; -} // namespace detail - -[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) { - return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED; -} - -[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) { - return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000; -} - -[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) { - return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000; -} - -[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) { - return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED; -} - -[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) { - return us.count() * detail::CNTFREQ_ADJUSTED / 1000; -} - -[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) { - return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000; -} - -[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) { - return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED; -} - -[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) { - return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED); -} - -[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) { - return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED); -} - -[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) { - return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED); -} - -} // namespace Core::Timing diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index 4e61d4335..d3286b352 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -153,7 +153,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const { const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); std::vector<VirtualDir> patch_dirs = {sdmc_load_dir}; - if (load_dir != nullptr && load_dir->GetSize() > 0) { + if (load_dir != nullptr) { const auto load_patch_dirs = load_dir->GetSubdirectories(); patch_dirs.insert(patch_dirs.end(), load_patch_dirs.begin(), load_patch_dirs.end()); } @@ -354,8 +354,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t const auto load_dir = fs_controller.GetModificationLoadRoot(title_id); const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); if ((type != ContentRecordType::Program && type != ContentRecordType::Data) || - ((load_dir == nullptr || load_dir->GetSize() <= 0) && - (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) { + (load_dir == nullptr && sdmc_load_dir == nullptr)) { return; } @@ -496,7 +495,7 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u // General Mods (LayeredFS and IPS) const auto mod_dir = fs_controller.GetModificationLoadRoot(title_id); - if (mod_dir != nullptr && mod_dir->GetSize() > 0) { + if (mod_dir != nullptr) { for (const auto& mod : mod_dir->GetSubdirectories()) { std::string types; @@ -540,7 +539,7 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u // SDMC mod directory (RomFS LayeredFS) const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); - if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0) { + if (sdmc_mod_dir != nullptr) { std::string types; if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "exefs"))) { AppendCommaIfNotEmpty(types, "LayeredExeFS"); diff --git a/src/core/file_sys/system_archive/time_zone_binary.cpp b/src/core/file_sys/system_archive/time_zone_binary.cpp index ceb0b41c6..7c17bbefa 100644 --- a/src/core/file_sys/system_archive/time_zone_binary.cpp +++ b/src/core/file_sys/system_archive/time_zone_binary.cpp @@ -15,7 +15,7 @@ namespace FileSys::SystemArchive { const static std::map<std::string, const std::map<const char*, const std::vector<u8>>&> tzdb_zoneinfo_dirs = {{"Africa", NxTzdb::africa}, {"America", NxTzdb::america}, - {"Antartica", NxTzdb::antartica}, + {"Antarctica", NxTzdb::antarctica}, {"Arctic", NxTzdb::arctic}, {"Asia", NxTzdb::asia}, {"Atlantic", NxTzdb::atlantic}, diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp index 853b893a1..311a59e5f 100644 --- a/src/core/file_sys/vfs_concat.cpp +++ b/src/core/file_sys/vfs_concat.cpp @@ -150,23 +150,29 @@ std::size_t ConcatenatedVfsFile::Read(u8* data, std::size_t length, std::size_t while (cur_length > 0 && it != concatenation_map.end()) { // Check if we can read the file at this position. const auto& file = it->file; - const u64 file_offset = it->offset; + const u64 map_offset = it->offset; const u64 file_size = file->GetSize(); - if (cur_offset >= file_offset + file_size) { + if (cur_offset > map_offset + file_size) { // Entirely out of bounds read. break; } // Read the file at this position. - const u64 intended_read_size = std::min<u64>(cur_length, file_size); + const u64 file_seek = cur_offset - map_offset; + const u64 intended_read_size = std::min<u64>(cur_length, file_size - file_seek); const u64 actual_read_size = - file->Read(data + (cur_offset - offset), intended_read_size, cur_offset - file_offset); + file->Read(data + (cur_offset - offset), intended_read_size, file_seek); // Update tracking. cur_offset += actual_read_size; cur_length -= actual_read_size; it++; + + // If we encountered a short read, we're done. + if (actual_read_size < intended_read_size) { + break; + } } return cur_offset - offset; diff --git a/src/core/file_sys/vfs_real.cpp b/src/core/file_sys/vfs_real.cpp index 7a15d8438..b0515ec05 100644 --- a/src/core/file_sys/vfs_real.cpp +++ b/src/core/file_sys/vfs_real.cpp @@ -10,6 +10,7 @@ #include "common/fs/fs.h" #include "common/fs/path_util.h" #include "common/logging/log.h" +#include "core/file_sys/vfs.h" #include "core/file_sys/vfs_real.h" // For FileTimeStampRaw @@ -72,8 +73,10 @@ VfsEntryType RealVfsFilesystem::GetEntryType(std::string_view path_) const { return VfsEntryType::File; } -VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) { +VirtualFile RealVfsFilesystem::OpenFileFromEntry(std::string_view path_, std::optional<u64> size, + Mode perms) { const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault); + std::scoped_lock lk{list_lock}; if (auto it = cache.find(path); it != cache.end()) { if (auto file = it->second.lock(); file) { @@ -81,23 +84,30 @@ VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) { } } - if (!FS::Exists(path) || !FS::IsFile(path)) { + if (!size && !FS::IsFile(path)) { return nullptr; } auto reference = std::make_unique<FileReference>(); - this->InsertReferenceIntoList(*reference); + this->InsertReferenceIntoListLocked(*reference); - auto file = - std::shared_ptr<RealVfsFile>(new RealVfsFile(*this, std::move(reference), path, perms)); + auto file = std::shared_ptr<RealVfsFile>( + new RealVfsFile(*this, std::move(reference), path, perms, size)); cache[path] = file; return file; } +VirtualFile RealVfsFilesystem::OpenFile(std::string_view path_, Mode perms) { + return OpenFileFromEntry(path_, {}, perms); +} + VirtualFile RealVfsFilesystem::CreateFile(std::string_view path_, Mode perms) { const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault); - cache.erase(path); + { + std::scoped_lock lk{list_lock}; + cache.erase(path); + } // Current usages of CreateFile expect to delete the contents of an existing file. if (FS::IsFile(path)) { @@ -127,8 +137,11 @@ VirtualFile RealVfsFilesystem::CopyFile(std::string_view old_path_, std::string_ VirtualFile RealVfsFilesystem::MoveFile(std::string_view old_path_, std::string_view new_path_) { const auto old_path = FS::SanitizePath(old_path_, FS::DirectorySeparator::PlatformDefault); const auto new_path = FS::SanitizePath(new_path_, FS::DirectorySeparator::PlatformDefault); - cache.erase(old_path); - cache.erase(new_path); + { + std::scoped_lock lk{list_lock}; + cache.erase(old_path); + cache.erase(new_path); + } if (!FS::RenameFile(old_path, new_path)) { return nullptr; } @@ -137,7 +150,10 @@ VirtualFile RealVfsFilesystem::MoveFile(std::string_view old_path_, std::string_ bool RealVfsFilesystem::DeleteFile(std::string_view path_) { const auto path = FS::SanitizePath(path_, FS::DirectorySeparator::PlatformDefault); - cache.erase(path); + { + std::scoped_lock lk{list_lock}; + cache.erase(path); + } return FS::RemoveFile(path); } @@ -176,14 +192,17 @@ bool RealVfsFilesystem::DeleteDirectory(std::string_view path_) { return FS::RemoveDirRecursively(path); } -void RealVfsFilesystem::RefreshReference(const std::string& path, Mode perms, - FileReference& reference) { +std::unique_lock<std::mutex> RealVfsFilesystem::RefreshReference(const std::string& path, + Mode perms, + FileReference& reference) { + std::unique_lock lk{list_lock}; + // Temporarily remove from list. - this->RemoveReferenceFromList(reference); + this->RemoveReferenceFromListLocked(reference); // Restore file if needed. if (!reference.file) { - this->EvictSingleReference(); + this->EvictSingleReferenceLocked(); reference.file = FS::FileOpen(path, ModeFlagsToFileAccessMode(perms), FS::FileType::BinaryFile); @@ -193,12 +212,16 @@ void RealVfsFilesystem::RefreshReference(const std::string& path, Mode perms, } // Reinsert into list. - this->InsertReferenceIntoList(reference); + this->InsertReferenceIntoListLocked(reference); + + return lk; } void RealVfsFilesystem::DropReference(std::unique_ptr<FileReference>&& reference) { + std::scoped_lock lk{list_lock}; + // Remove from list. - this->RemoveReferenceFromList(*reference); + this->RemoveReferenceFromListLocked(*reference); // Close the file. if (reference->file) { @@ -207,14 +230,14 @@ void RealVfsFilesystem::DropReference(std::unique_ptr<FileReference>&& reference } } -void RealVfsFilesystem::EvictSingleReference() { +void RealVfsFilesystem::EvictSingleReferenceLocked() { if (num_open_files < MaxOpenFiles || open_references.empty()) { return; } // Get and remove from list. auto& reference = open_references.back(); - this->RemoveReferenceFromList(reference); + this->RemoveReferenceFromListLocked(reference); // Close the file. if (reference.file) { @@ -223,10 +246,10 @@ void RealVfsFilesystem::EvictSingleReference() { } // Reinsert into closed list. - this->InsertReferenceIntoList(reference); + this->InsertReferenceIntoListLocked(reference); } -void RealVfsFilesystem::InsertReferenceIntoList(FileReference& reference) { +void RealVfsFilesystem::InsertReferenceIntoListLocked(FileReference& reference) { if (reference.file) { open_references.push_front(reference); } else { @@ -234,7 +257,7 @@ void RealVfsFilesystem::InsertReferenceIntoList(FileReference& reference) { } } -void RealVfsFilesystem::RemoveReferenceFromList(FileReference& reference) { +void RealVfsFilesystem::RemoveReferenceFromListLocked(FileReference& reference) { if (reference.file) { open_references.erase(open_references.iterator_to(reference)); } else { @@ -243,10 +266,10 @@ void RealVfsFilesystem::RemoveReferenceFromList(FileReference& reference) { } RealVfsFile::RealVfsFile(RealVfsFilesystem& base_, std::unique_ptr<FileReference> reference_, - const std::string& path_, Mode perms_) + const std::string& path_, Mode perms_, std::optional<u64> size_) : base(base_), reference(std::move(reference_)), path(path_), parent_path(FS::GetParentPath(path_)), path_components(FS::SplitPathComponents(path_)), - perms(perms_) {} + size(size_), perms(perms_) {} RealVfsFile::~RealVfsFile() { base.DropReference(std::move(reference)); @@ -257,12 +280,15 @@ std::string RealVfsFile::GetName() const { } std::size_t RealVfsFile::GetSize() const { - base.RefreshReference(path, perms, *reference); - return reference->file ? reference->file->GetSize() : 0; + if (size) { + return *size; + } + return FS::GetSize(path); } bool RealVfsFile::Resize(std::size_t new_size) { - base.RefreshReference(path, perms, *reference); + size.reset(); + auto lk = base.RefreshReference(path, perms, *reference); return reference->file ? reference->file->SetSize(new_size) : false; } @@ -279,7 +305,7 @@ bool RealVfsFile::IsReadable() const { } std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset) const { - base.RefreshReference(path, perms, *reference); + auto lk = base.RefreshReference(path, perms, *reference); if (!reference->file || !reference->file->Seek(static_cast<s64>(offset))) { return 0; } @@ -287,7 +313,8 @@ std::size_t RealVfsFile::Read(u8* data, std::size_t length, std::size_t offset) } std::size_t RealVfsFile::Write(const u8* data, std::size_t length, std::size_t offset) { - base.RefreshReference(path, perms, *reference); + size.reset(); + auto lk = base.RefreshReference(path, perms, *reference); if (!reference->file || !reference->file->Seek(static_cast<s64>(offset))) { return 0; } @@ -309,10 +336,11 @@ std::vector<VirtualFile> RealVfsDirectory::IterateEntries<RealVfsFile, VfsFile>( std::vector<VirtualFile> out; - const FS::DirEntryCallable callback = [this, &out](const std::filesystem::path& full_path) { - const auto full_path_string = FS::PathToUTF8String(full_path); + const FS::DirEntryCallable callback = [this, + &out](const std::filesystem::directory_entry& entry) { + const auto full_path_string = FS::PathToUTF8String(entry.path()); - out.emplace_back(base.OpenFile(full_path_string, perms)); + out.emplace_back(base.OpenFileFromEntry(full_path_string, entry.file_size(), perms)); return true; }; @@ -330,8 +358,9 @@ std::vector<VirtualDir> RealVfsDirectory::IterateEntries<RealVfsDirectory, VfsDi std::vector<VirtualDir> out; - const FS::DirEntryCallable callback = [this, &out](const std::filesystem::path& full_path) { - const auto full_path_string = FS::PathToUTF8String(full_path); + const FS::DirEntryCallable callback = [this, + &out](const std::filesystem::directory_entry& entry) { + const auto full_path_string = FS::PathToUTF8String(entry.path()); out.emplace_back(base.OpenDirectory(full_path_string, perms)); @@ -483,12 +512,10 @@ std::map<std::string, VfsEntryType, std::less<>> RealVfsDirectory::GetEntries() std::map<std::string, VfsEntryType, std::less<>> out; - const FS::DirEntryCallable callback = [&out](const std::filesystem::path& full_path) { - const auto filename = FS::PathToUTF8String(full_path.filename()); - + const FS::DirEntryCallable callback = [&out](const std::filesystem::directory_entry& entry) { + const auto filename = FS::PathToUTF8String(entry.path().filename()); out.insert_or_assign(filename, - FS::IsDir(full_path) ? VfsEntryType::Directory : VfsEntryType::File); - + entry.is_directory() ? VfsEntryType::Directory : VfsEntryType::File); return true; }; diff --git a/src/core/file_sys/vfs_real.h b/src/core/file_sys/vfs_real.h index d8c900e33..26ea7df62 100644 --- a/src/core/file_sys/vfs_real.h +++ b/src/core/file_sys/vfs_real.h @@ -4,6 +4,8 @@ #pragma once #include <map> +#include <mutex> +#include <optional> #include <string_view> #include "common/intrusive_list.h" #include "core/file_sys/mode.h" @@ -20,6 +22,8 @@ struct FileReference : public Common::IntrusiveListBaseNode<FileReference> { }; class RealVfsFile; +class RealVfsDirectory; + class RealVfsFilesystem : public VfsFilesystem { public: RealVfsFilesystem(); @@ -45,17 +49,24 @@ private: std::map<std::string, std::weak_ptr<VfsFile>, std::less<>> cache; ReferenceListType open_references; ReferenceListType closed_references; + std::mutex list_lock; size_t num_open_files{}; private: friend class RealVfsFile; - void RefreshReference(const std::string& path, Mode perms, FileReference& reference); + std::unique_lock<std::mutex> RefreshReference(const std::string& path, Mode perms, + FileReference& reference); void DropReference(std::unique_ptr<FileReference>&& reference); - void EvictSingleReference(); private: - void InsertReferenceIntoList(FileReference& reference); - void RemoveReferenceFromList(FileReference& reference); + friend class RealVfsDirectory; + VirtualFile OpenFileFromEntry(std::string_view path, std::optional<u64> size, + Mode perms = Mode::Read); + +private: + void EvictSingleReferenceLocked(); + void InsertReferenceIntoListLocked(FileReference& reference); + void RemoveReferenceFromListLocked(FileReference& reference); }; // An implementation of VfsFile that represents a file on the user's computer. @@ -78,13 +89,14 @@ public: private: RealVfsFile(RealVfsFilesystem& base, std::unique_ptr<FileReference> reference, - const std::string& path, Mode perms = Mode::Read); + const std::string& path, Mode perms = Mode::Read, std::optional<u64> size = {}); RealVfsFilesystem& base; std::unique_ptr<FileReference> reference; std::string path; std::string parent_path; std::vector<std::string> path_components; + std::optional<u64> size; Mode perms; }; diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index faa12b4f0..75ce5a23c 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -184,7 +184,8 @@ u64 KScheduler::UpdateHighestPriorityThread(KThread* highest_thread) { prev_highest_thread != highest_thread) [[likely]] { if (prev_highest_thread != nullptr) [[likely]] { IncrementScheduledCount(prev_highest_thread); - prev_highest_thread->SetLastScheduledTick(m_kernel.System().CoreTiming().GetCPUTicks()); + prev_highest_thread->SetLastScheduledTick( + m_kernel.System().CoreTiming().GetClockTicks()); } if (m_state.should_count_idle) { if (highest_thread != nullptr) [[likely]] { @@ -351,7 +352,7 @@ void KScheduler::SwitchThread(KThread* next_thread) { // Update the CPU time tracking variables. const s64 prev_tick = m_last_context_switch_time; - const s64 cur_tick = m_kernel.System().CoreTiming().GetCPUTicks(); + const s64 cur_tick = m_kernel.System().CoreTiming().GetClockTicks(); const s64 tick_diff = cur_tick - prev_tick; cur_thread->AddCpuTime(m_core_id, tick_diff); if (cur_process != nullptr) { diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp index b7da3eee7..3e5b735b1 100644 --- a/src/core/hle/kernel/k_synchronization_object.cpp +++ b/src/core/hle/kernel/k_synchronization_object.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" #include "core/hle/kernel/k_synchronization_object.h" @@ -75,7 +76,7 @@ Result KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index, KSynchronizationObject** objects, const s32 num_objects, s64 timeout) { // Allocate space on stack for thread nodes. - std::vector<ThreadListNode> thread_nodes(num_objects); + std::array<ThreadListNode, Svc::ArgumentHandleCountMax> thread_nodes; // Prepare for wait. KThread* thread = GetCurrentThreadPointer(kernel); diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 70480b725..adb6ec581 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -4,6 +4,8 @@ #include <algorithm> #include <atomic> #include <cinttypes> +#include <condition_variable> +#include <mutex> #include <optional> #include <vector> @@ -907,7 +909,7 @@ Result KThread::SetActivity(Svc::ThreadActivity activity) { R_SUCCEED(); } -Result KThread::GetThreadContext3(std::vector<u8>& out) { +Result KThread::GetThreadContext3(Common::ScratchBuffer<u8>& out) { // Lock ourselves. KScopedLightLock lk{m_activity_pause_lock}; @@ -925,15 +927,13 @@ Result KThread::GetThreadContext3(std::vector<u8>& out) { // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. auto context = GetContext64(); context.pstate &= 0xFF0FFE20; - - out.resize(sizeof(context)); + out.resize_destructive(sizeof(context)); std::memcpy(out.data(), std::addressof(context), sizeof(context)); } else { // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. auto context = GetContext32(); context.cpsr &= 0xFF0FFE20; - - out.resize(sizeof(context)); + out.resize_destructive(sizeof(context)); std::memcpy(out.data(), std::addressof(context), sizeof(context)); } } @@ -1313,7 +1313,8 @@ void KThread::RequestDummyThreadWait() { ASSERT(this->IsDummyThread()); // We will block when the scheduler lock is released. - m_dummy_thread_runnable.store(false); + std::scoped_lock lock{m_dummy_thread_mutex}; + m_dummy_thread_runnable = false; } void KThread::DummyThreadBeginWait() { @@ -1323,7 +1324,8 @@ void KThread::DummyThreadBeginWait() { } // Block until runnable is no longer false. - m_dummy_thread_runnable.wait(false); + std::unique_lock lock{m_dummy_thread_mutex}; + m_dummy_thread_cv.wait(lock, [this] { return m_dummy_thread_runnable; }); } void KThread::DummyThreadEndWait() { @@ -1331,8 +1333,11 @@ void KThread::DummyThreadEndWait() { ASSERT(this->IsDummyThread()); // Wake up the waiting thread. - m_dummy_thread_runnable.store(true); - m_dummy_thread_runnable.notify_one(); + { + std::scoped_lock lock{m_dummy_thread_mutex}; + m_dummy_thread_runnable = true; + } + m_dummy_thread_cv.notify_one(); } void KThread::BeginWait(KThreadQueue* queue) { diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index f9814ac8f..dd662b3f8 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -15,6 +15,7 @@ #include "common/intrusive_list.h" #include "common/intrusive_red_black_tree.h" +#include "common/scratch_buffer.h" #include "common/spin_lock.h" #include "core/arm/arm_interface.h" #include "core/hle/kernel/k_affinity_mask.h" @@ -567,7 +568,7 @@ public: void RemoveWaiter(KThread* thread); - Result GetThreadContext3(std::vector<u8>& out); + Result GetThreadContext3(Common::ScratchBuffer<u8>& out); KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) { return this->RemoveWaiterByKey(out_has_waiters, key, false); @@ -892,7 +893,9 @@ private: std::shared_ptr<Common::Fiber> m_host_context{}; ThreadType m_thread_type{}; StepState m_step_state{}; - std::atomic<bool> m_dummy_thread_runnable{true}; + bool m_dummy_thread_runnable{true}; + std::mutex m_dummy_thread_mutex{}; + std::condition_variable m_dummy_thread_cv{}; // For debugging std::vector<KSynchronizationObject*> m_wait_objects_for_debugging{}; diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp index 2b2c878b5..445cdd87b 100644 --- a/src/core/hle/kernel/svc/svc_info.cpp +++ b/src/core/hle/kernel/svc/svc_info.cpp @@ -199,9 +199,9 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { const u64 thread_ticks = current_thread->GetCpuTime(); - out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks); + out_ticks = thread_ticks + (core_timing.GetClockTicks() - prev_ctx_ticks); } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) { - out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks; + out_ticks = core_timing.GetClockTicks() - prev_ctx_ticks; } *result = out_ticks; diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp index ea03068aa..60247df2e 100644 --- a/src/core/hle/kernel/svc/svc_ipc.cpp +++ b/src/core/hle/kernel/svc/svc_ipc.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/scope_exit.h" +#include "common/scratch_buffer.h" #include "core/core.h" #include "core/hle/kernel/k_client_session.h" #include "core/hle/kernel/k_process.h" @@ -45,11 +46,11 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)), ResultInvalidPointer); - std::vector<Handle> handles(num_handles); + std::array<Handle, Svc::ArgumentHandleCountMax> handles; GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles); // Convert handle list to object table. - std::vector<KSynchronizationObject*> objs(num_handles); + std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs; R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(), num_handles), ResultInvalidHandle); @@ -80,7 +81,7 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad // Wait for an object. s32 index; Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(), - static_cast<s32>(objs.size()), timeout_ns); + num_handles, timeout_ns); if (result == ResultTimedOut) { R_RETURN(result); } diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp index 04d65f0bd..53df5bcd8 100644 --- a/src/core/hle/kernel/svc/svc_synchronization.cpp +++ b/src/core/hle/kernel/svc/svc_synchronization.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/scope_exit.h" +#include "common/scratch_buffer.h" #include "core/core.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_readable_event.h" @@ -54,7 +55,7 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons // Get the synchronization context. auto& kernel = system.Kernel(); auto& handle_table = GetCurrentProcess(kernel).GetHandleTable(); - std::vector<KSynchronizationObject*> objs(num_handles); + std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs; // Copy user handles. if (num_handles > 0) { @@ -72,8 +73,8 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons }); // Wait on the objects. - Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(), - static_cast<s32>(objs.size()), timeout_ns); + Result res = + KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout_ns); R_SUCCEED_IF(res == ResultSessionClosed); R_RETURN(res); @@ -87,8 +88,7 @@ Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_ha // Ensure number of handles is valid. R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange); - - std::vector<Handle> handles(num_handles); + std::array<Handle, Svc::ArgumentHandleCountMax> handles; if (num_handles > 0) { GetCurrentMemory(system.Kernel()) .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle)); diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp index 37b54079c..36b94e6bf 100644 --- a/src/core/hle/kernel/svc/svc_thread.cpp +++ b/src/core/hle/kernel/svc/svc_thread.cpp @@ -174,7 +174,7 @@ Result GetThreadContext3(Core::System& system, u64 out_context, Handle thread_ha } // Get the thread context. - std::vector<u8> context; + static thread_local Common::ScratchBuffer<u8> context; R_TRY(thread->GetThreadContext3(context)); // Copy the thread context to user space. diff --git a/src/core/hle/kernel/svc/svc_tick.cpp b/src/core/hle/kernel/svc/svc_tick.cpp index 561336482..7dd7c6e51 100644 --- a/src/core/hle/kernel/svc/svc_tick.cpp +++ b/src/core/hle/kernel/svc/svc_tick.cpp @@ -12,16 +12,8 @@ namespace Kernel::Svc { int64_t GetSystemTick(Core::System& system) { LOG_TRACE(Kernel_SVC, "called"); - auto& core_timing = system.CoreTiming(); - // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) - const u64 result{core_timing.GetClockTicks()}; - - if (!system.Kernel().IsMulticore()) { - core_timing.AddTicks(400U); - } - - return static_cast<int64_t>(result); + return static_cast<int64_t>(system.CoreTiming().GetClockTicks()); } int64_t GetSystemTick64(Core::System& system) { diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index f0640c64f..c8d574993 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp @@ -5,6 +5,7 @@ #include "audio_core/renderer/audio_device.h" #include "common/common_funcs.h" #include "common/logging/log.h" +#include "common/settings.h" #include "common/string_util.h" #include "core/core.h" #include "core/hle/kernel/k_event.h" @@ -123,19 +124,13 @@ private: void GetReleasedAudioInBuffer(HLERequestContext& ctx) { const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); - std::vector<u64> released_buffers(write_buffer_size); + tmp_buffer.resize_destructive(write_buffer_size); + tmp_buffer[0] = 0; - const auto count = impl->GetReleasedBuffers(released_buffers); + const auto count = impl->GetReleasedBuffers(tmp_buffer); - [[maybe_unused]] std::string tags{}; - for (u32 i = 0; i < count; i++) { - tags += fmt::format("{:08X}, ", released_buffers[i]); - } - [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()}; - LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count, - tags); + ctx.WriteBuffer(tmp_buffer); - ctx.WriteBuffer(released_buffers); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); rb.Push(count); @@ -200,6 +195,7 @@ private: KernelHelpers::ServiceContext service_context; Kernel::KEvent* event; std::shared_ptr<AudioCore::AudioIn::In> impl; + Common::ScratchBuffer<u64> tmp_buffer; }; AudInU::AudInU(Core::System& system_) diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 3e62fa4fc..032c8c11f 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -123,19 +123,13 @@ private: void GetReleasedAudioOutBuffers(HLERequestContext& ctx) { const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); - std::vector<u64> released_buffers(write_buffer_size); + tmp_buffer.resize_destructive(write_buffer_size); + tmp_buffer[0] = 0; - const auto count = impl->GetReleasedBuffers(released_buffers); + const auto count = impl->GetReleasedBuffers(tmp_buffer); - [[maybe_unused]] std::string tags{}; - for (u32 i = 0; i < count; i++) { - tags += fmt::format("{:08X}, ", released_buffers[i]); - } - [[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()}; - LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count, - tags); + ctx.WriteBuffer(tmp_buffer); - ctx.WriteBuffer(released_buffers); IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); rb.Push(count); @@ -211,6 +205,7 @@ private: KernelHelpers::ServiceContext service_context; Kernel::KEvent* event; std::shared_ptr<AudioCore::AudioOut::Out> impl; + Common::ScratchBuffer<u64> tmp_buffer; }; AudOutU::AudOutU(Core::System& system_) diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 7086d4750..12845c23a 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -116,28 +116,26 @@ private: // These buffers are written manually to avoid an issue with WriteBuffer throwing errors for // checking size 0. Performance size is 0 for most games. - std::vector<u8> output{}; - std::vector<u8> performance{}; auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0}; if (is_buffer_b) { const auto buffersB{ctx.BufferDescriptorB()}; - output.resize(buffersB[0].Size(), 0); - performance.resize(buffersB[1].Size(), 0); + tmp_output.resize_destructive(buffersB[0].Size()); + tmp_performance.resize_destructive(buffersB[1].Size()); } else { const auto buffersC{ctx.BufferDescriptorC()}; - output.resize(buffersC[0].Size(), 0); - performance.resize(buffersC[1].Size(), 0); + tmp_output.resize_destructive(buffersC[0].Size()); + tmp_performance.resize_destructive(buffersC[1].Size()); } - auto result = impl->RequestUpdate(input, performance, output); + auto result = impl->RequestUpdate(input, tmp_performance, tmp_output); if (result.IsSuccess()) { if (is_buffer_b) { - ctx.WriteBufferB(output.data(), output.size(), 0); - ctx.WriteBufferB(performance.data(), performance.size(), 1); + ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0); + ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1); } else { - ctx.WriteBufferC(output.data(), output.size(), 0); - ctx.WriteBufferC(performance.data(), performance.size(), 1); + ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0); + ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1); } } else { LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description); @@ -235,6 +233,8 @@ private: Kernel::KEvent* rendered_event; Manager& manager; std::unique_ptr<Renderer> impl; + Common::ScratchBuffer<u8> tmp_output; + Common::ScratchBuffer<u8> tmp_performance; }; class IAudioDevice final : public ServiceFramework<IAudioDevice> { diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 24ce37e87..d8e9c8719 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h @@ -4,6 +4,7 @@ #pragma once #include "audio_core/audio_render_manager.h" +#include "common/scratch_buffer.h" #include "core/hle/service/kernel_helpers.h" #include "core/hle/service/service.h" diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index 451ac224a..c835f6cb7 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -68,13 +68,13 @@ private: ExtraBehavior extra_behavior) { u32 consumed = 0; u32 sample_count = 0; - std::vector<opus_int16> samples(ctx.GetWriteBufferNumElements<opus_int16>()); + tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>()); if (extra_behavior == ExtraBehavior::ResetContext) { ResetDecoderContext(); } - if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { + if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) { LOG_ERROR(Audio, "Failed to decode opus data"); IPC::ResponseBuilder rb{ctx, 2}; // TODO(ogniK): Use correct error code @@ -90,11 +90,11 @@ private: if (performance) { rb.Push<u64>(*performance); } - ctx.WriteBuffer(samples); + ctx.WriteBuffer(tmp_samples); } bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input, - std::vector<opus_int16>& output, u64* out_performance_time) const { + std::span<opus_int16> output, u64* out_performance_time) const { const auto start_time = std::chrono::steady_clock::now(); const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); if (sizeof(OpusPacketHeader) > input.size()) { @@ -154,6 +154,7 @@ private: OpusDecoderPtr decoder; u32 sample_rate; u32 channel_count; + Common::ScratchBuffer<opus_int16> tmp_samples; }; class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { diff --git a/src/core/hle/service/hid/hidbus.cpp b/src/core/hle/service/hid/hidbus.cpp index 5604a6fda..80aac221b 100644 --- a/src/core/hle/service/hid/hidbus.cpp +++ b/src/core/hle/service/hid/hidbus.cpp @@ -5,7 +5,6 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hid/hid_types.h" #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_readable_event.h" diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index ab1f30f9e..a04538d5d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h @@ -34,7 +34,7 @@ public: * @returns The result code of the ioctl. */ virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) = 0; + std::span<u8> output) = 0; /** * Handles an ioctl2 request. @@ -45,7 +45,7 @@ public: * @returns The result code of the ioctl. */ virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) = 0; + std::span<const u8> inline_input, std::span<u8> output) = 0; /** * Handles an ioctl3 request. @@ -56,7 +56,7 @@ public: * @returns The result code of the ioctl. */ virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) = 0; + std::span<u8> output, std::span<u8> inline_output) = 0; /** * Called once a device is opened diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 5a5b2e305..05a43d8dc 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -18,19 +18,19 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core) nvdisp_disp0::~nvdisp_disp0() = default; NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { + std::span<u8> output, std::span<u8> inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -51,8 +51,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form stride, format, transform, crop_rect}; system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); - system.GetPerfStats().EndSystemFrame(); system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); + system.GetPerfStats().EndSystemFrame(); system.GetPerfStats().BeginSystemFrame(); } diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index bcd0e3ed5..daee05fe8 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -26,11 +26,11 @@ public: ~nvdisp_disp0() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 681bd0867..07e570a9f 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -28,7 +28,7 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Con nvhost_as_gpu::~nvhost_as_gpu() = default; NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { switch (command.group) { case 'A': switch (command.cmd) { @@ -61,13 +61,13 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i } NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { + std::span<u8> output, std::span<u8> inline_output) { switch (command.group) { case 'A': switch (command.cmd) { @@ -87,7 +87,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i void nvhost_as_gpu::OnOpen(DeviceFD fd) {} void nvhost_as_gpu::OnClose(DeviceFD fd) {} -NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::span<u8> output) { IoctlAllocAsEx params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -141,7 +141,7 @@ NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& ou return NvResult::Success; } -NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::span<u8> output) { IoctlAllocSpace params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -220,7 +220,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { mapping_map.erase(offset); } -NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::span<u8> output) { IoctlFreeSpace params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -266,15 +266,14 @@ NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& ou return NvResult::Success; } -NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::span<u8> output) { const auto num_entries = input.size() / sizeof(IoctlRemapEntry); LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); - std::vector<IoctlRemapEntry> entries(num_entries); - std::memcpy(entries.data(), input.data(), input.size()); - std::scoped_lock lock(mutex); + entries.resize_destructive(num_entries); + std::memcpy(entries.data(), input.data(), input.size()); if (!vm.initialised) { return NvResult::BadValue; @@ -320,7 +319,7 @@ NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output return NvResult::Success; } -NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::span<u8> output) { IoctlMapBufferEx params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -424,7 +423,7 @@ NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>& return NvResult::Success; } -NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::span<u8> output) { IoctlUnmapBuffer params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -463,7 +462,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>& return NvResult::Success; } -NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::span<u8> output) { IoctlBindChannel params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); @@ -492,7 +491,7 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) { }; } -NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output) { IoctlGetVaRegions params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -511,8 +510,8 @@ NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& return NvResult::Success; } -NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) { +NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) { IoctlGetVaRegions params{}; std::memcpy(¶ms, input.data(), input.size()); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index 1aba8d579..2af3e1260 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -15,6 +15,7 @@ #include "common/address_space.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/scratch_buffer.h" #include "common/swap.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" @@ -48,11 +49,11 @@ public: ~nvhost_as_gpu() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -138,18 +139,18 @@ private: static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2, "IoctlGetVaRegions is incorrect size"); - NvResult AllocAsEx(std::span<const u8> input, std::vector<u8>& output); - NvResult AllocateSpace(std::span<const u8> input, std::vector<u8>& output); - NvResult Remap(std::span<const u8> input, std::vector<u8>& output); - NvResult MapBufferEx(std::span<const u8> input, std::vector<u8>& output); - NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output); - NvResult FreeSpace(std::span<const u8> input, std::vector<u8>& output); - NvResult BindChannel(std::span<const u8> input, std::vector<u8>& output); + NvResult AllocAsEx(std::span<const u8> input, std::span<u8> output); + NvResult AllocateSpace(std::span<const u8> input, std::span<u8> output); + NvResult Remap(std::span<const u8> input, std::span<u8> output); + NvResult MapBufferEx(std::span<const u8> input, std::span<u8> output); + NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output); + NvResult FreeSpace(std::span<const u8> input, std::span<u8> output); + NvResult BindChannel(std::span<const u8> input, std::span<u8> output); void GetVARegionsImpl(IoctlGetVaRegions& params); - NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output); - NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output); + NvResult GetVARegions(std::span<const u8> input, std::span<u8> output); + NvResult GetVARegions(std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output); void FreeMappingLocked(u64 offset); @@ -212,6 +213,7 @@ private: bool initialised{}; } vm; std::shared_ptr<Tegra::MemoryManager> gmmu; + Common::ScratchBuffer<IoctlRemapEntry> entries; // s32 channel{}; // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index e12025560..4d55554b4 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -35,7 +35,7 @@ nvhost_ctrl::~nvhost_ctrl() { } NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -64,13 +64,13 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inp } NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_outpu) { + std::span<u8> output, std::span<u8> inline_outpu) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -79,7 +79,7 @@ void nvhost_ctrl::OnOpen(DeviceFD fd) {} void nvhost_ctrl::OnClose(DeviceFD fd) {} -NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output) { IocGetConfigParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(), @@ -87,7 +87,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8 return NvResult::ConfigVarNotFound; // Returns error on production mode } -NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output, +NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::span<u8> output, bool is_allocation) { IocCtrlEventWaitParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); @@ -231,7 +231,7 @@ NvResult nvhost_ctrl::FreeEvent(u32 slot) { return NvResult::Success; } -NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output) { IocCtrlEventRegisterParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); const u32 event_id = params.user_event_id; @@ -252,7 +252,7 @@ NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vecto return NvResult::Success; } -NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output) { IocCtrlEventUnregisterParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); const u32 event_id = params.user_event_id & 0x00FF; @@ -262,8 +262,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vec return FreeEvent(event_id); } -NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, - std::vector<u8>& output) { +NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output) { IocCtrlEventUnregisterBatchParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); u64 event_mask = params.user_events; @@ -281,7 +280,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, return NvResult::Success; } -NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output) { IocCtrlEventClearParams params{}; std::memcpy(¶ms, input.data(), sizeof(params)); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index dd2e7888a..2efed4862 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -26,11 +26,11 @@ public: ~nvhost_ctrl() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -186,13 +186,12 @@ private: static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8, "IocCtrlEventKill is incorrect size"); - NvResult NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output); - NvResult IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output, - bool is_allocation); - NvResult IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output); - NvResult IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output); - NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::vector<u8>& output); - NvResult IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output); + NvResult NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output); + NvResult IocCtrlEventWait(std::span<const u8> input, std::span<u8> output, bool is_allocation); + NvResult IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output); + NvResult IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output); + NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output); + NvResult IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output); NvResult FreeEvent(u32 slot); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index be3c083db..6081d92e9 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -22,7 +22,7 @@ nvhost_ctrl_gpu::~nvhost_ctrl_gpu() { } NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { switch (command.group) { case 'G': switch (command.cmd) { @@ -54,13 +54,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> } NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { + std::span<u8> output, std::span<u8> inline_output) { switch (command.group) { case 'G': switch (command.cmd) { @@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} -NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlCharacteristics params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -127,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) { +NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlCharacteristics params{}; std::memcpy(¶ms, input.data(), input.size()); @@ -175,7 +175,7 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output) { IoctlGpuGetTpcMasksArgs params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); @@ -186,8 +186,8 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8> return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) { +NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) { IoctlGpuGetTpcMasksArgs params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); @@ -199,7 +199,7 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8> return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::span<u8> output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlActiveSlotMask params{}; @@ -212,7 +212,7 @@ NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vect return NvResult::Success; } -NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlZcullGetCtxSize params{}; @@ -224,7 +224,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector return NvResult::Success; } -NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::span<u8> output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlNvgpuGpuZcullGetInfoArgs params{}; @@ -247,7 +247,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8 return NvResult::Success; } -NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::span<u8> output) { LOG_WARNING(Service_NVDRV, "(STUBBED) called"); IoctlZbcSetTable params{}; @@ -263,7 +263,7 @@ NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8> return NvResult::Success; } -NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::span<u8> output) { LOG_WARNING(Service_NVDRV, "(STUBBED) called"); IoctlZbcQueryTable params{}; @@ -273,7 +273,7 @@ NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u return NvResult::Success; } -NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::span<u8> output) { LOG_WARNING(Service_NVDRV, "(STUBBED) called"); IoctlFlushL2 params{}; @@ -283,7 +283,7 @@ NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& ou return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::span<u8> output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlGetGpuTime params{}; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index b9333d9d3..97995551c 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h @@ -22,11 +22,11 @@ public: ~nvhost_ctrl_gpu() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -151,21 +151,21 @@ private: }; static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size"); - NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output); - NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output); - - NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output); - NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output); - - NvResult GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output); - NvResult ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output); - NvResult ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output); - NvResult ZBCSetTable(std::span<const u8> input, std::vector<u8>& output); - NvResult ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output); - NvResult FlushL2(std::span<const u8> input, std::vector<u8>& output); - NvResult GetGpuTime(std::span<const u8> input, std::vector<u8>& output); + NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output); + NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output); + + NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output); + NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output); + + NvResult GetActiveSlotMask(std::span<const u8> input, std::span<u8> output); + NvResult ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output); + NvResult ZCullGetInfo(std::span<const u8> input, std::span<u8> output); + NvResult ZBCSetTable(std::span<const u8> input, std::span<u8> output); + NvResult ZBCQueryTable(std::span<const u8> input, std::span<u8> output); + NvResult FlushL2(std::span<const u8> input, std::span<u8> output); + NvResult GetGpuTime(std::span<const u8> input, std::span<u8> output); EventInterface& events_interface; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 453a965dc..46a25fcab 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -47,7 +47,7 @@ nvhost_gpu::~nvhost_gpu() { } NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -99,7 +99,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu }; NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { switch (command.group) { case 'H': switch (command.cmd) { @@ -113,7 +113,7 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> inpu } NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { + std::span<u8> output, std::span<u8> inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -121,7 +121,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu void nvhost_gpu::OnOpen(DeviceFD fd) {} void nvhost_gpu::OnClose(DeviceFD fd) {} -NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) { IoctlSetNvmapFD params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); @@ -130,7 +130,7 @@ NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& outp return NvResult::Success; } -NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::span<u8> output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlClientData params{}; @@ -139,7 +139,7 @@ NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& o return NvResult::Success; } -NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::span<u8> output) { LOG_DEBUG(Service_NVDRV, "called"); IoctlClientData params{}; @@ -149,7 +149,7 @@ NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& o return NvResult::Success; } -NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::span<u8> output) { std::memcpy(&zcull_params, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va, zcull_params.mode); @@ -158,7 +158,7 @@ NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& outpu return NvResult::Success; } -NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::span<u8> output) { IoctlSetErrorNotifier params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset, @@ -168,14 +168,14 @@ NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8> return NvResult::Success; } -NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::span<u8> output) { std::memcpy(&channel_priority, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority); return NvResult::Success; } -NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output) { IoctlAllocGpfifoEx2 params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_WARNING(Service_NVDRV, @@ -197,7 +197,7 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& return NvResult::Success; } -NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::span<u8> output) { IoctlAllocObjCtx params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num, @@ -208,7 +208,8 @@ NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vecto return NvResult::Success; } -static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { +static boost::container::small_vector<Tegra::CommandHeader, 512> BuildWaitCommandList( + NvFence fence) { return { Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, Tegra::SubmissionMode::Increasing), @@ -219,35 +220,35 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { }; } -static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence) { - std::vector<Tegra::CommandHeader> result{ +static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementCommandList( + NvFence fence) { + boost::container::small_vector<Tegra::CommandHeader, 512> result{ Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, Tegra::SubmissionMode::Increasing), {}}; for (u32 count = 0; count < 2; ++count) { - result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, - Tegra::SubmissionMode::Increasing)); - result.emplace_back( + result.push_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, + Tegra::SubmissionMode::Increasing)); + result.push_back( BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); } return result; } -static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence) { - std::vector<Tegra::CommandHeader> result{ +static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementWithWfiCommandList( + NvFence fence) { + boost::container::small_vector<Tegra::CommandHeader, 512> result{ Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, Tegra::SubmissionMode::Increasing), {}}; - const std::vector<Tegra::CommandHeader> increment{BuildIncrementCommandList(fence)}; - - result.insert(result.end(), increment.begin(), increment.end()); - + auto increment_list{BuildIncrementCommandList(fence)}; + result.insert(result.end(), increment_list.begin(), increment_list.end()); return result; } -NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, +NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output, Tegra::CommandList&& entries) { LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, params.num_entries, params.flags.raw); @@ -293,7 +294,7 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8> return NvResult::Success; } -NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output, +NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output, bool kickoff) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); @@ -315,7 +316,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8> } NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline, - std::vector<u8>& output) { + std::span<u8> output) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); return NvResult::InvalidSize; @@ -327,7 +328,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const return SubmitGPFIFOImpl(params, output, std::move(entries)); } -NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::span<u8> output) { IoctlGetWaitbase params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); @@ -337,7 +338,7 @@ NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& out return NvResult::Success; } -NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::span<u8> output) { IoctlChannelSetTimeout params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlChannelSetTimeout)); LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout); @@ -345,7 +346,7 @@ NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8 return NvResult::Success; } -NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output) { IoctlSetTimeslice params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSetTimeslice)); LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 3ca58202d..529c20526 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -41,11 +41,11 @@ public: ~nvhost_gpu() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -186,23 +186,23 @@ private: u32_le channel_priority{}; u32_le channel_timeslice{}; - NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output); - NvResult SetClientData(std::span<const u8> input, std::vector<u8>& output); - NvResult GetClientData(std::span<const u8> input, std::vector<u8>& output); - NvResult ZCullBind(std::span<const u8> input, std::vector<u8>& output); - NvResult SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output); - NvResult SetChannelPriority(std::span<const u8> input, std::vector<u8>& output); - NvResult AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output); - NvResult AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output); - NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, + NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output); + NvResult SetClientData(std::span<const u8> input, std::span<u8> output); + NvResult GetClientData(std::span<const u8> input, std::span<u8> output); + NvResult ZCullBind(std::span<const u8> input, std::span<u8> output); + NvResult SetErrorNotifier(std::span<const u8> input, std::span<u8> output); + NvResult SetChannelPriority(std::span<const u8> input, std::span<u8> output); + NvResult AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output); + NvResult AllocateObjectContext(std::span<const u8> input, std::span<u8> output); + NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output, Tegra::CommandList&& entries); - NvResult SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output, + NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output, bool kickoff = false); NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline, - std::vector<u8>& output); - NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output); - NvResult ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output); - NvResult ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output); + std::span<u8> output); + NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output); + NvResult ChannelSetTimeout(std::span<const u8> input, std::span<u8> output); + NvResult ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output); EventInterface& events_interface; NvCore::Container& core; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index dc45169ad..a174442a6 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -16,7 +16,7 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_) nvhost_nvdec::~nvhost_nvdec() = default; NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -56,13 +56,13 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in } NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { + std::span<u8> output, std::span<u8> inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h index 0d615bbcb..ad2233c49 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -14,11 +14,11 @@ public: ~nvhost_nvdec() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 1ab51f10b..61649aa4a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -36,7 +36,7 @@ std::size_t SliceVectors(std::span<const u8> input, std::vector<T>& dst, std::si // Writes the data in src to an offset into the dst vector. The offset is specified in bytes // Returns the number of bytes written into dst. template <typename T> -std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { +std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size_t offset) { if (src.empty()) { return 0; } @@ -72,8 +72,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(std::span<const u8> input) { return NvResult::Success; } -NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, - std::vector<u8>& output) { +NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output) { IoctlSubmit params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); @@ -121,7 +120,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, return NvResult::Success; } -NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::span<u8> output) { IoctlGetSyncpoint params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint)); LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); @@ -133,7 +132,7 @@ NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vecto return NvResult::Success; } -NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::span<u8> output) { IoctlGetWaitbase params{}; LOG_CRITICAL(Service_NVDRV, "called WAITBASE"); std::memcpy(¶ms, input.data(), sizeof(IoctlGetWaitbase)); @@ -142,7 +141,7 @@ NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector return NvResult::Success; } -NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::span<u8> output) { IoctlMapBuffer params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); @@ -159,7 +158,7 @@ NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u return NvResult::Success; } -NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::span<u8> output) { IoctlMapBuffer params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); @@ -173,7 +172,7 @@ NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector return NvResult::Success; } -NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::span<u8> output) { std::memcpy(&submit_timeout, input.data(), input.size()); LOG_WARNING(Service_NVDRV, "(STUBBED) called"); return NvResult::Success; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index 5af26a26f..9bb573bfe 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -108,12 +108,12 @@ protected: /// Ioctl command implementations NvResult SetNVMAPfd(std::span<const u8> input); - NvResult Submit(DeviceFD fd, std::span<const u8> input, std::vector<u8>& output); - NvResult GetSyncpoint(std::span<const u8> input, std::vector<u8>& output); - NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output); - NvResult MapBuffer(std::span<const u8> input, std::vector<u8>& output); - NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output); - NvResult SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output); + NvResult Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output); + NvResult GetSyncpoint(std::span<const u8> input, std::span<u8> output); + NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output); + NvResult MapBuffer(std::span<const u8> input, std::span<u8> output); + NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output); + NvResult SetSubmitTimeout(std::span<const u8> input, std::span<u8> output); Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 39f30e7c8..a05c8cdae 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp @@ -13,7 +13,7 @@ nvhost_nvjpg::nvhost_nvjpg(Core::System& system_) : nvdevice{system_} {} nvhost_nvjpg::~nvhost_nvjpg() = default; NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { switch (command.group) { case 'H': switch (command.cmd) { @@ -32,13 +32,13 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in } NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { + std::span<u8> output, std::span<u8> inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -46,7 +46,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in void nvhost_nvjpg::OnOpen(DeviceFD fd) {} void nvhost_nvjpg::OnClose(DeviceFD fd) {} -NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) { IoctlSetNvmapFD params{}; std::memcpy(¶ms, input.data(), input.size()); LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h index 41b57e872..5623e0d47 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h @@ -16,11 +16,11 @@ public: ~nvhost_nvjpg() override; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -33,7 +33,7 @@ private: s32_le nvmap_fd{}; - NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output); + NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output); }; } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index b0ea402a7..c0b8684c3 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -16,7 +16,7 @@ nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_) nvhost_vic::~nvhost_vic() = default; NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { switch (command.group) { case 0x0: switch (command.cmd) { @@ -56,13 +56,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu } NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { + std::span<u8> output, std::span<u8> inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h index b5e350a83..cadbcb0a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h @@ -13,11 +13,11 @@ public: ~nvhost_vic(); NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 07417f045..e7f7e273b 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -26,7 +26,7 @@ nvmap::nvmap(Core::System& system_, NvCore::Container& container_) nvmap::~nvmap() = default; NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { switch (command.group) { case 0x1: switch (command.cmd) { @@ -55,13 +55,13 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, } NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } -NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { +NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) { UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); return NvResult::NotImplemented; } @@ -69,7 +69,7 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, void nvmap::OnOpen(DeviceFD fd) {} void nvmap::OnClose(DeviceFD fd) {} -NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvmap::IocCreate(std::span<const u8> input, std::span<u8> output) { IocCreateParams params; std::memcpy(¶ms, input.data(), sizeof(params)); LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); @@ -89,7 +89,7 @@ NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) { return NvResult::Success; } -NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvmap::IocAlloc(std::span<const u8> input, std::span<u8> output) { IocAllocParams params; std::memcpy(¶ms, input.data(), sizeof(params)); LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); @@ -137,7 +137,7 @@ NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) { return result; } -NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvmap::IocGetId(std::span<const u8> input, std::span<u8> output) { IocGetIdParams params; std::memcpy(¶ms, input.data(), sizeof(params)); @@ -161,7 +161,7 @@ NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) { return NvResult::Success; } -NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvmap::IocFromId(std::span<const u8> input, std::span<u8> output) { IocFromIdParams params; std::memcpy(¶ms, input.data(), sizeof(params)); @@ -192,7 +192,7 @@ NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) { return NvResult::Success; } -NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvmap::IocParam(std::span<const u8> input, std::span<u8> output) { enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 }; IocParamParams params; @@ -241,7 +241,7 @@ NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) { return NvResult::Success; } -NvResult nvmap::IocFree(std::span<const u8> input, std::vector<u8>& output) { +NvResult nvmap::IocFree(std::span<const u8> input, std::span<u8> output) { IocFreeParams params; std::memcpy(¶ms, input.data(), sizeof(params)); diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h index 82bd3b118..40c65b430 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.h +++ b/src/core/hle/service/nvdrv/devices/nvmap.h @@ -27,11 +27,11 @@ public: nvmap& operator=(const nvmap&) = delete; NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) override; + std::span<u8> output) override; NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) override; - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output) override; + std::span<const u8> inline_input, std::span<u8> output) override; + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) override; void OnOpen(DeviceFD fd) override; void OnClose(DeviceFD fd) override; @@ -106,12 +106,12 @@ private: }; static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); - NvResult IocCreate(std::span<const u8> input, std::vector<u8>& output); - NvResult IocAlloc(std::span<const u8> input, std::vector<u8>& output); - NvResult IocGetId(std::span<const u8> input, std::vector<u8>& output); - NvResult IocFromId(std::span<const u8> input, std::vector<u8>& output); - NvResult IocParam(std::span<const u8> input, std::vector<u8>& output); - NvResult IocFree(std::span<const u8> input, std::vector<u8>& output); + NvResult IocCreate(std::span<const u8> input, std::span<u8> output); + NvResult IocAlloc(std::span<const u8> input, std::span<u8> output); + NvResult IocGetId(std::span<const u8> input, std::span<u8> output); + NvResult IocFromId(std::span<const u8> input, std::span<u8> output); + NvResult IocParam(std::span<const u8> input, std::span<u8> output); + NvResult IocFree(std::span<const u8> input, std::span<u8> output); NvCore::Container& container; NvCore::NvMap& file; diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 3d774eec4..9e46ee8dd 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -130,7 +130,7 @@ DeviceFD Module::Open(const std::string& device_name) { } NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output) { + std::span<u8> output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; @@ -147,7 +147,7 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, } NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output) { + std::span<const u8> inline_input, std::span<u8> output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; @@ -163,8 +163,8 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, return itr->second->Ioctl2(fd, command, input, inline_input, output); } -NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::vector<u8>& output, std::vector<u8>& inline_output) { +NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output) { if (fd < 0) { LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); return NvResult::InvalidState; diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h index 668be742b..d8622b3ca 100644 --- a/src/core/hle/service/nvdrv/nvdrv.h +++ b/src/core/hle/service/nvdrv/nvdrv.h @@ -80,13 +80,13 @@ public: DeviceFD Open(const std::string& device_name); /// Sends an ioctl command to the specified file descriptor. - NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output); + NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output); NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, - std::span<const u8> inline_input, std::vector<u8>& output); + std::span<const u8> inline_input, std::span<u8> output); - NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, - std::vector<u8>& inline_output); + NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output, + std::span<u8> inline_output); /// Closes a device file descriptor and returns operation success. NvResult Close(DeviceFD fd); diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp index d010a1e03..348207e25 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp +++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp @@ -63,12 +63,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) { } // Check device - std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); + tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); const auto input_buffer = ctx.ReadBuffer(0); - const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); + const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output); if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer); + ctx.WriteBuffer(tmp_output); } IPC::ResponseBuilder rb{ctx, 3}; @@ -90,12 +90,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) { const auto input_buffer = ctx.ReadBuffer(0); const auto input_inlined_buffer = ctx.ReadBuffer(1); - std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); + tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); const auto nv_result = - nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); + nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output); if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer); + ctx.WriteBuffer(tmp_output); } IPC::ResponseBuilder rb{ctx, 3}; @@ -116,14 +116,12 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) { } const auto input_buffer = ctx.ReadBuffer(0); - std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); - std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); - - const auto nv_result = - nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline); + tmp_output.resize_destructive(ctx.GetWriteBufferSize(0)); + tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1)); + const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline); if (command.is_out != 0) { - ctx.WriteBuffer(output_buffer, 0); - ctx.WriteBuffer(output_buffer_inline, 1); + ctx.WriteBuffer(tmp_output, 0); + ctx.WriteBuffer(tmp_output_inline, 1); } IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h index 881ea1a6b..4b593ff90 100644 --- a/src/core/hle/service/nvdrv/nvdrv_interface.h +++ b/src/core/hle/service/nvdrv/nvdrv_interface.h @@ -4,6 +4,7 @@ #pragma once #include <memory> +#include "common/scratch_buffer.h" #include "core/hle/service/nvdrv/nvdrv.h" #include "core/hle/service/service.h" @@ -33,6 +34,8 @@ private: u64 pid{}; bool is_initialized{}; + Common::ScratchBuffer<u8> tmp_output; + Common::ScratchBuffer<u8> tmp_output_inline; }; } // namespace Service::Nvidia diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index da2d5890f..b41c6240c 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -70,7 +70,8 @@ Nvnflinger::Nvnflinger(Core::System& system_, HosBinderDriverServer& hos_binder_ [this](std::uintptr_t, s64 time, std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> { vsync_signal.store(true); - vsync_signal.notify_all(); + { const auto lock_guard = Lock(); } + vsync_signal.notify_one(); return std::chrono::nanoseconds(GetNextTicks()); }); diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h index fb56d75d7..23ba315a0 100644 --- a/src/core/hle/service/nvnflinger/parcel.h +++ b/src/core/hle/service/nvnflinger/parcel.h @@ -6,6 +6,7 @@ #include <memory> #include <span> #include <vector> +#include <boost/container/small_vector.hpp> #include "common/alignment.h" #include "common/assert.h" @@ -167,7 +168,7 @@ public: private: template <typename T> requires(std::is_trivially_copyable_v<T>) - void WriteImpl(const T& val, std::vector<u8>& buffer) { + void WriteImpl(const T& val, boost::container::small_vector<u8, 0x200>& buffer) { const size_t aligned_size = Common::AlignUp(sizeof(T), 4); const size_t old_size = buffer.size(); buffer.resize(old_size + aligned_size); @@ -176,8 +177,8 @@ private: } private: - std::vector<u8> m_data_buffer; - std::vector<u8> m_object_buffer; + boost::container::small_vector<u8, 0x200> m_data_buffer; + boost::container::small_vector<u8, 0x200> m_object_buffer; }; } // namespace Service::android diff --git a/src/core/hle/service/time/clock_types.h b/src/core/hle/service/time/clock_types.h index e6293ffb9..9fc01ea90 100644 --- a/src/core/hle/service/time/clock_types.h +++ b/src/core/hle/service/time/clock_types.h @@ -3,6 +3,8 @@ #pragma once +#include <ratio> + #include "common/common_funcs.h" #include "common/common_types.h" #include "common/uuid.h" @@ -74,18 +76,19 @@ static_assert(std::is_trivially_copyable_v<ContinuousAdjustmentTimePoint>, /// https://switchbrew.org/wiki/Glue_services#TimeSpanType struct TimeSpanType { s64 nanoseconds{}; - static constexpr s64 ns_per_second{1000000000ULL}; s64 ToSeconds() const { - return nanoseconds / ns_per_second; + return nanoseconds / std::nano::den; } static TimeSpanType FromSeconds(s64 seconds) { - return {seconds * ns_per_second}; + return {seconds * std::nano::den}; } - static TimeSpanType FromTicks(u64 ticks, u64 frequency) { - return FromSeconds(static_cast<s64>(ticks) / static_cast<s64>(frequency)); + template <u64 Frequency> + static TimeSpanType FromTicks(u64 ticks) { + using TicksToNSRatio = std::ratio<std::nano::den, Frequency>; + return {static_cast<s64>(ticks * TicksToNSRatio::num / TicksToNSRatio::den)}; } }; static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size"); diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index 3dbbb9850..5627b7003 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp @@ -10,7 +10,7 @@ namespace Service::Time::Clock { TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { const TimeSpanType ticks_time_span{ - TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; + TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(system.CoreTiming().GetClockTicks())}; TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index 27600413e..0d9fb3143 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp @@ -10,7 +10,7 @@ namespace Service::Time::Clock { SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { const TimeSpanType ticks_time_span{ - TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; + TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(system.CoreTiming().GetClockTicks())}; return {ticks_time_span.ToSeconds(), GetClockSourceId()}; } diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 868be60c5..7197ca30f 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -240,8 +240,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(HLERequestCon const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)}; if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { - const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), - Core::Hardware::CNTFREQ)}; + const auto ticks{Clock::TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>( + system.CoreTiming().GetClockTicks())}; const s64 base_time_point{context.offset + current_time_point.time_point - ticks.ToSeconds()}; IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index ce1c85bcc..a00676669 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -21,8 +21,9 @@ SharedMemory::~SharedMemory() = default; void SharedMemory::SetupStandardSteadyClock(const Common::UUID& clock_source_id, Clock::TimeSpanType current_time_point) { - const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( - system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; + const Clock::TimeSpanType ticks_time_span{ + Clock::TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>( + system.CoreTiming().GetClockTicks())}; const Clock::SteadyClockContext context{ static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds), clock_source_id}; diff --git a/src/core/hle/service/time/time_zone_manager.cpp b/src/core/hle/service/time/time_zone_manager.cpp index e1728c06d..205371a26 100644 --- a/src/core/hle/service/time/time_zone_manager.cpp +++ b/src/core/hle/service/time/time_zone_manager.cpp @@ -849,8 +849,9 @@ static Result CreateCalendarTime(s64 time, int gmt_offset, CalendarTimeInternal& static Result ToCalendarTimeInternal(const TimeZoneRule& rules, s64 time, CalendarTimeInternal& calendar_time, CalendarAdditionalInfo& calendar_additional_info) { - if ((rules.go_ahead && time < rules.ats[0]) || - (rules.go_back && time > rules.ats[rules.time_count - 1])) { + ASSERT(rules.go_ahead ? rules.time_count > 0 : true); + if ((rules.go_back && time < rules.ats[0]) || + (rules.go_ahead && time > rules.ats[rules.time_count - 1])) { s64 seconds{}; if (time < rules.ats[0]) { seconds = rules.ats[0] - time; @@ -910,9 +911,13 @@ static Result ToCalendarTimeInternal(const TimeZoneRule& rules, s64 time, calendar_additional_info.is_dst = rules.ttis[tti_index].is_dst; const char* time_zone{&rules.chars[rules.ttis[tti_index].abbreviation_list_index]}; - for (int index{}; time_zone[index] != '\0'; ++index) { + u32 index; + for (index = 0; time_zone[index] != '\0' && time_zone[index] != ',' && + index < calendar_additional_info.timezone_name.size() - 1; + ++index) { calendar_additional_info.timezone_name[index] = time_zone[index]; } + calendar_additional_info.timezone_name[index] = '\0'; return ResultSuccess; } diff --git a/src/core/hle/service/time/time_zone_service.cpp b/src/core/hle/service/time/time_zone_service.cpp index e8273e152..8171c82a5 100644 --- a/src/core/hle/service/time/time_zone_service.cpp +++ b/src/core/hle/service/time/time_zone_service.cpp @@ -112,20 +112,14 @@ void ITimeZoneService::LoadTimeZoneRule(HLERequestContext& ctx) { LOG_DEBUG(Service_Time, "called, location_name={}", location_name); TimeZone::TimeZoneRule time_zone_rule{}; - if (const Result result{ - time_zone_content_manager.LoadTimeZoneRule(time_zone_rule, location_name)}; - result != ResultSuccess) { - IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(result); - return; - } + const Result result{time_zone_content_manager.LoadTimeZoneRule(time_zone_rule, location_name)}; std::vector<u8> time_zone_rule_outbuffer(sizeof(TimeZone::TimeZoneRule)); std::memcpy(time_zone_rule_outbuffer.data(), &time_zone_rule, sizeof(TimeZone::TimeZoneRule)); ctx.WriteBuffer(time_zone_rule_outbuffer); IPC::ResponseBuilder rb{ctx, 2}; - rb.Push(ResultSuccess); + rb.Push(result); } void ITimeZoneService::ToCalendarTime(HLERequestContext& ctx) { diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index c3c2281bb..9ff4028c2 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -479,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; const size_t xfb_varying_index{base_index + element}; - if (xfb_varying_index < runtime_info.xfb_varyings.size()) { + if (xfb_varying_index < runtime_info.xfb_count) { xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0f86a8004..34592a01f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr } void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { - if (ctx.runtime_info.xfb_varyings.empty()) { + if (ctx.runtime_info.xfb_count == 0) { return; } ctx.AddCapability(spv::Capability::TransformFeedback); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index fd15f47ea..bec5db173 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -160,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; const size_t xfb_varying_index{base_attr_index + element}; - if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) { + if (xfb_varying_index < ctx.runtime_info.xfb_count) { xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index]; xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 3b63c249f..619c0b138 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -84,7 +84,8 @@ struct RuntimeInfo { bool glasm_use_storage_buffers{}; /// Transform feedback state for each varying - std::vector<TransformFeedbackVarying> xfb_varyings; + std::array<TransformFeedbackVarying, 256> xfb_varyings{}; + u32 xfb_count{0}; }; } // namespace Shader diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9bafd8cc0..58a45ab67 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am if (has_new_downloads) { memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); } - tmp_buffer.resize(amount); + tmp_buffer.resize_destructive(amount); cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); return true; @@ -719,9 +719,15 @@ void BufferCache<P>::BindHostVertexBuffers() { bool any_valid{false}; auto& flags = maxwell3d->dirty.flags; for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { + const Binding& binding = channel_state->vertex_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer, binding.buffer_id); + SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; } + flags[Dirty::VertexBuffer0 + index] = false; + host_bindings.min_index = std::min(host_bindings.min_index, index); host_bindings.max_index = std::max(host_bindings.max_index, index); any_valid = true; @@ -735,9 +741,6 @@ void BufferCache<P>::BindHostVertexBuffers() { const Binding& binding = channel_state->vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer, binding.buffer_id); - SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); - const u32 stride = maxwell3d->regs.vertex_streams[index].stride; const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1276,7 +1279,7 @@ template <class P> typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, u32 wanted_size) { static constexpr int STREAM_LEAP_THRESHOLD = 16; - std::vector<BufferId> overlap_ids; + boost::container::small_vector<BufferId, 16> overlap_ids; VAddr begin = cpu_addr; VAddr end = cpu_addr + wanted_size; int stream_score = 0; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 63a120f7a..fe6068cfe 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; struct OverlapResult { - std::vector<BufferId> ids; + boost::container::small_vector<BufferId, 16> ids; VAddr begin; VAddr end; bool has_stream_leap = false; @@ -582,7 +582,7 @@ private: BufferId inline_buffer_id; std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; - std::vector<u8> tmp_buffer; + Common::ScratchBuffer<u8> tmp_buffer; }; } // namespace VideoCommon diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 83112dfce..7d660af47 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -63,7 +63,6 @@ struct ChCommand { }; using ChCommandHeaderList = std::vector<ChCommandHeader>; -using ChCommandList = std::vector<ChCommand>; struct ThiRegisters { u32_le increment_syncpt{}; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1cdb690ed..8a2784cdc 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -6,6 +6,7 @@ #include <array> #include <span> #include <vector> +#include <boost/container/small_vector.hpp> #include <queue> #include "common/bit_field.h" @@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub struct CommandList final { CommandList() = default; explicit CommandList(std::size_t size) : command_lists(size) {} - explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) + explicit CommandList( + boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_) : prefetch_command_list{std::move(prefetch_command_list_)} {} - std::vector<CommandListHeader> command_lists; - std::vector<CommandHeader> prefetch_command_list; + boost::container::small_vector<CommandListHeader, 512> command_lists; + boost::container::small_vector<CommandHeader, 512> prefetch_command_list; }; /** diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 0e94c521a..f34090791 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/engines/draw_manager.h" #include "video_core/rasterizer_interface.h" @@ -195,8 +196,12 @@ void DrawManager::DrawTexture() { if (lower_left) { draw_texture_state.dst_y0 -= dst_height; } - draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; - draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; + draw_texture_state.dst_x1 = + draw_texture_state.dst_x0 + + static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_width))); + draw_texture_state.dst_y1 = + draw_texture_state.dst_y0 + + static_cast<f32>(Settings::values.resolution_info.ScaleUp(static_cast<u32>(dst_height))); draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f; draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f; draw_texture_state.src_x1 = @@ -207,7 +212,6 @@ void DrawManager::DrawTexture() { draw_texture_state.src_y0; draw_texture_state.src_sampler = regs.draw_texture.src_sampler; draw_texture_state.src_texture = regs.draw_texture.src_texture; - maxwell3d->rasterizer->DrawTexture(); } diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ebe5536de..bc1eb41e7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -108,9 +108,11 @@ void MaxwellDMA::Launch() { if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { ASSERT(regs.remap_const.component_size_minus_one == 3); accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); - std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); + read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); + std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in); + std::ranges::fill(span, regs.remap_consta_value); memory_manager.WriteBlockUnsafe(regs.offset_out, - reinterpret_cast<u8*>(tmp_buffer.data()), + reinterpret_cast<u8*>(read_buffer.data()), regs.line_length_in * sizeof(u32)); } else { memory_manager.FlushCaching(); @@ -126,32 +128,32 @@ void MaxwellDMA::Launch() { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); - std::vector<u8> tmp_buffer(16); + read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { memory_manager.ReadBlockUnsafe( convert_linear_2_blocklinear_addr(regs.offset_in + offset), - tmp_buffer.data(), tmp_buffer.size()); - memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), - tmp_buffer.size()); + read_buffer.data(), read_buffer.size()); + memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), + read_buffer.size()); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); - std::vector<u8> tmp_buffer(16); + read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { - memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), - tmp_buffer.size()); + memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), + read_buffer.size()); memory_manager.WriteBlockCached( convert_linear_2_blocklinear_addr(regs.offset_out + offset), - tmp_buffer.data(), tmp_buffer.size()); + read_buffer.data(), read_buffer.size()); } } else { if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - std::vector<u8> tmp_buffer(regs.line_length_in); - memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), + read_buffer.resize_destructive(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), regs.line_length_in); - memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), + memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), regs.line_length_in); } } @@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { src_operand.address = regs.offset_in; DMA::BufferOperand dst_operand; - dst_operand.pitch = regs.pitch_out; + u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out)); + dst_operand.pitch = abs_pitch_out; dst_operand.width = regs.line_length_in; dst_operand.height = regs.line_count; dst_operand.address = regs.offset_out; @@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; + const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count; read_buffer.resize_destructive(src_size); write_buffer.resize_destructive(dst_size); @@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, - regs.pitch_out); + abs_pitch_out); memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 456f733cf..db385076d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -193,18 +193,13 @@ struct GPU::Impl { } [[nodiscard]] u64 GetTicks() const { - // This values were reversed engineered by fincs from NVN - // The gpu clock is reported in units of 385/625 nanoseconds - constexpr u64 gpu_ticks_num = 384; - constexpr u64 gpu_ticks_den = 625; + u64 gpu_tick = system.CoreTiming().GetGPUTicks(); - u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); if (Settings::values.use_fast_gpu_time.GetValue()) { - nanoseconds /= 256; + gpu_tick /= 256; } - const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; - const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; - return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; + + return gpu_tick; } [[nodiscard]] bool IsAsync() const { diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 6ce179167..ce827eb6c 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -4,6 +4,7 @@ #include <array> #include <bit> +#include "common/scratch_buffer.h" #include "common/settings.h" #include "video_core/host1x/codecs/h264.h" #include "video_core/host1x/host1x.h" @@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) { } void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { - std::vector<u8> scan(count); + static Common::ScratchBuffer<u8> scan{}; + scan.resize_destructive(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); } else { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 7b2cde7a7..45141e488 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -111,7 +111,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); SetEntry<false>(current_gpu_addr, entry_type); if (current_entry_type != entry_type) { - rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); + rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); } if constexpr (entry_type == EntryType::Mapped) { const VAddr current_cpu_addr = cpu_addr + offset; @@ -134,7 +134,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); SetEntry<true>(current_gpu_addr, entry_type); if (current_entry_type != entry_type) { - rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); + rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); } if constexpr (entry_type == EntryType::Mapped) { const VAddr current_cpu_addr = cpu_addr + offset; @@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, VideoCommon::CacheType which) { - std::vector<u8> tmp_buffer(size); + tmp_buffer.resize_destructive(size); ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); // The output block must be flushed in case it has data modified from the GPU. @@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons return result; } -std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( - GPUVAddr gpu_addr, std::size_t size) const { - std::vector<std::pair<GPUVAddr, std::size_t>> result{}; +boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> +MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const { + boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{}; GetSubmappedRangeImpl<true>(gpu_addr, size, result); return result; } @@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( template <bool is_gpu_address> void MemoryManager::GetSubmappedRangeImpl( GPUVAddr gpu_addr, std::size_t size, - std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& - result) const { + boost::container::small_vector< + std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) + const { std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> last_segment{}; std::optional<VAddr> old_page_addr{}; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 794535122..4202c26ff 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -8,10 +8,12 @@ #include <mutex> #include <optional> #include <vector> +#include <boost/container/small_vector.hpp> #include "common/common_types.h" #include "common/multi_level_page_table.h" #include "common/range_map.h" +#include "common/scratch_buffer.h" #include "common/virtual_buffer.h" #include "video_core/cache_types.h" #include "video_core/pte_kind.h" @@ -107,8 +109,8 @@ public: * if the region is continuous, a single pair will be returned. If it's unmapped, an empty * vector will be returned; */ - std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, - std::size_t size) const; + boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( + GPUVAddr gpu_addr, std::size_t size) const; GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); @@ -165,7 +167,8 @@ private: template <bool is_gpu_address> void GetSubmappedRangeImpl( GPUVAddr gpu_addr, std::size_t size, - std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& + boost::container::small_vector< + std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) const; Core::System& system; @@ -215,8 +218,8 @@ private: Common::VirtualBuffer<u32> big_page_table_cpu; std::vector<u64> big_page_continuous; - std::vector<std::pair<VAddr, std::size_t>> page_stash{}; - std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; + boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; + boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; mutable std::mutex guard; @@ -226,6 +229,8 @@ private: std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; static std::atomic<size_t> unique_identifier_generator; + + Common::ScratchBuffer<u8> tmp_buffer; }; } // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 1a0cea9b7..3151c0db8 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -87,7 +87,8 @@ void ComputePipeline::Configure() { texture_cache.SynchronizeComputeDescriptors(); boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; - std::array<GLuint, MAX_TEXTURES> samplers; + boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers; + std::array<GLuint, MAX_TEXTURES> gl_samplers; std::array<GLuint, MAX_TEXTURES> textures; std::array<GLuint, MAX_IMAGES> images; GLsizei sampler_binding{}; @@ -131,7 +132,6 @@ void ComputePipeline::Configure() { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - samplers[sampler_binding++] = 0; } } for (const auto& desc : info.image_buffer_descriptors) { @@ -142,8 +142,8 @@ void ComputePipeline::Configure() { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers[sampler_binding++] = sampler->Handle(); + VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { @@ -186,10 +186,17 @@ void ComputePipeline::Configure() { const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + num_image_buffers}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; texture_binding += num_texture_buffers; image_binding += num_image_buffers; u32 texture_scaling_mask{}; + + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + gl_samplers[sampler_binding++] = 0; + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; @@ -198,6 +205,12 @@ void ComputePipeline::Configure() { texture_scaling_mask |= 1u << texture_binding; } ++texture_binding; + + const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + gl_samplers[sampler_binding++] = + use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle(); } } u32 image_scaling_mask{}; @@ -228,7 +241,7 @@ void ComputePipeline::Configure() { if (texture_binding != 0) { ASSERT(texture_binding == sampler_binding); glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); + glBindSamplers(0, sampler_binding, gl_samplers.data()); } if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 89000d6e0..c58f760b8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -275,9 +275,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views; - std::array<GLuint, MAX_TEXTURES> samplers; + std::array<VideoCommon::SamplerId, MAX_TEXTURES> samplers; size_t views_index{}; - GLsizei sampler_binding{}; + size_t samplers_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -337,7 +337,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; views[views_index++] = {handle.first}; - samplers[sampler_binding++] = 0; } } } @@ -351,8 +350,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views[views_index++] = {handle.first}; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_binding++] = sampler->Handle(); + VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + samplers[samplers_index++] = sampler; } } if constexpr (Spec::has_images) { @@ -445,10 +444,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { program_manager.BindSourcePrograms(source_programs); } const VideoCommon::ImageViewInOut* views_it{views.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; + GLsizei sampler_binding{}; std::array<GLuint, MAX_TEXTURES> textures; std::array<GLuint, MAX_IMAGES> images; + std::array<GLuint, MAX_TEXTURES> gl_samplers; const auto prepare_stage{[&](size_t stage) { buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); buffer_cache.BindHostStageBuffers(stage); @@ -465,6 +467,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { u32 stage_image_binding{}; const auto& info{stage_infos[stage]}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + gl_samplers[sampler_binding++] = 0; + } + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView((views_it++)->id)}; @@ -474,6 +483,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } ++texture_binding; ++stage_texture_binding; + + const Sampler& sampler{texture_cache.GetSampler(*(samplers_it++))}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + gl_samplers[sampler_binding++] = + use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle(); } } for (const auto& desc : info.image_descriptors) { @@ -534,7 +549,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (texture_binding != 0) { ASSERT(texture_binding == sampler_binding); glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); + glBindSamplers(0, sampler_binding, gl_samplers.data()); } if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3f077311e..0329ed820 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, case Shader::Stage::VertexB: case Shader::Stage::Geometry: if (!use_assembly_shaders && key.xfb_enabled != 0) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } break; case Shader::Stage::TessellationEval: diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 1c5dbcdd8..3b446be07 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1268,36 +1268,48 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1); - sampler.Create(); - const GLuint handle = sampler.handle; - glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); - glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); - glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); - glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); - glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); - glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); - glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); - glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); - glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); - glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); - glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); - - if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { - const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); - glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy); - } else { - LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); - } - if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { - glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); - } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { - LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); - } - if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { - glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); - } else if (seamless == GL_FALSE) { - // We default to false because it's more common - LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + const f32 max_anisotropy = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f); + + const auto create_sampler = [&](const f32 anisotropy) { + OGLSampler new_sampler; + new_sampler.Create(); + const GLuint handle = new_sampler.handle; + glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v)); + glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p)); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode); + glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func); + glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag); + glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min); + glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias()); + glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod()); + glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod()); + glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data()); + + if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) { + glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, anisotropy); + } else { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required"); + } + if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) { + glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter); + } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) { + LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required"); + } + if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) { + glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless); + } else if (seamless == GL_FALSE) { + // We default to false because it's more common + LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required"); + } + return new_sampler; + }; + + sampler = create_sampler(max_anisotropy); + + const f32 max_anisotropy_default = static_cast<f32>(1U << config.max_anisotropy); + if (max_anisotropy > max_anisotropy_default) { + sampler_default_anisotropy = create_sampler(max_anisotropy_default); } } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1148b73d7..3676eaaa9 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -309,12 +309,21 @@ class Sampler { public: explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&); - GLuint Handle() const noexcept { + [[nodiscard]] GLuint Handle() const noexcept { return sampler.handle; } + [[nodiscard]] GLuint HandleWithDefaultAnisotropy() const noexcept { + return sampler_default_anisotropy.handle; + } + + [[nodiscard]] bool HasAddedAnisotropy() const noexcept { + return static_cast<bool>(sampler_default_anisotropy.handle); + } + private: OGLSampler sampler; + OGLSampler sampler_default_anisotropy; }; class Framebuffer { diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 983e1c2e1..71c783709 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -178,7 +178,7 @@ public: inline void PushImageDescriptors(TextureCache& texture_cache, GuestDescriptorQueue& guest_descriptor_queue, const Shader::Info& info, RescalingPushConstant& rescaling, - const VkSampler*& samplers, + const VideoCommon::SamplerId*& samplers, const VideoCommon::ImageViewInOut*& views) { const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); @@ -187,10 +187,15 @@ inline void PushImageDescriptors(TextureCache& texture_cache, for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const VideoCommon::ImageViewId image_view_id{(views++)->id}; - const VkSampler sampler{*(samplers++)}; + const VideoCommon::SamplerId sampler_id{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(image_view_id)}; const VkImageView vk_image_view{image_view.Handle(desc.type)}; - guest_descriptor_queue.AddSampledImage(vk_image_view, sampler); + const Sampler& sampler{texture_cache.GetSampler(sampler_id)}; + const bool use_fallback_sampler{sampler.HasAddedAnisotropy() && + !image_view.SupportsAnisotropy()}; + const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() + : sampler.Handle()}; + guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler); rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8c33722d3..f47301ad5 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, }; // Measuring a popular game, this number never exceeds the specified size once data is warmed up - boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); + boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { @@ -516,15 +516,15 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi buffer_handles.push_back(handle); } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([bindings = bindings, - buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { cmdbuf.BindVertexBuffers2EXT( bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data()); }); } else { - scheduler.Record([bindings = bindings, - buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), bindings.offsets.data()); }); @@ -561,12 +561,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings< for (u32 index = 0; index < bindings.buffers.size(); ++index) { buffer_handles.push_back(bindings.buffers[index]->Handle()); } - scheduler.Record( - [bindings = bindings, buffer_handles = buffer_handles](vk::CommandBuffer cmdbuf) { - cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()), - buffer_handles.data(), bindings.offsets.data(), - bindings.sizes.data()); - }); + scheduler.Record([bindings = std::move(bindings), + buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()), + buffer_handles.data(), bindings.offsets.data(), + bindings.sizes.data()); + }); } void BufferCacheRuntime::ReserveNullBuffer() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 733e70d9d..73e585c2b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -115,7 +115,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, static constexpr size_t max_elements = 64; boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views; - boost::container::static_vector<VkSampler, max_elements> samplers; + boost::container::static_vector<VideoCommon::SamplerId, max_elements> samplers; const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; @@ -160,8 +160,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers.push_back(sampler->Handle()); + VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { @@ -192,7 +192,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, buffer_cache.BindHostComputeBuffers(); RescalingPushConstant rescaling; - const VkSampler* samplers_it{samplers.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; PushImageDescriptors(texture_cache, guest_descriptor_queue, info, rescaling, samplers_it, views_it); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 506b78f08..c1595642e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -298,7 +298,7 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template <typename Spec> void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array<VideoCommon::ImageViewInOut, MAX_IMAGE_ELEMENTS> views; - std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; + std::array<VideoCommon::SamplerId, MAX_IMAGE_ELEMENTS> samplers; size_t sampler_index{}; size_t view_index{}; @@ -367,8 +367,8 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views[view_index++] = {handle.first}; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_index++] = sampler->Handle(); + VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + samplers[sampler_index++] = sampler; } } if constexpr (Spec::has_images) { @@ -453,7 +453,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { RescalingPushConstant rescaling; RenderAreaPushConstant render_area; - const VkSampler* samplers_it{samplers.data()}; + const VideoCommon::SamplerId* samplers_it{samplers.data()}; const VideoCommon::ImageViewInOut* views_it{views.data()}; const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index b128c4f6e..5eeda08d2 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -3,6 +3,7 @@ #include <thread> +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_device.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 18e040a1b..9f316113c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + auto [varyings, count] = + VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } info.convert_depth_mode = gl_ndc; } @@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + auto [varyings, count] = + VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); + info.xfb_varyings = varyings; + info.xfb_count = count; } info.convert_depth_mode = gl_ndc; break; @@ -705,10 +711,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, PipelineStatistics* statistics, bool build_in_parallel) try { - // TODO: Remove this when Intel fixes their shader compiler. - // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159 - if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS && - !Settings::values.enable_compute_pipelines.GetValue()) { + if (device.HasBrokenCompute()) { LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8711e2a87..f3cef09dd 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { }; } -[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( - std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { - std::vector<VkBufferCopy> result(copies.size()); +[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16> +TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { + boost::container::small_vector<VkBufferCopy, 16> result(copies.size()); std::ranges::transform( copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { return VkBufferCopy{ @@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { return result; } -[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( +[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies( std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { struct Maker { VkBufferImageCopy operator()(const BufferImageCopy& copy) const { @@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { VkImageAspectFlags aspect_mask; }; if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - std::vector<VkBufferImageCopy> result(copies.size() * 2); + boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); std::ranges::transform(copies, result.begin() + copies.size(), Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); return result; } else { - std::vector<VkBufferImageCopy> result(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size()); std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); return result; } @@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() { void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) { - std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); - std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size()); + boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size()); const VkImageAspectFlags src_aspect_mask = src.AspectMask(); const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); @@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im void TextureCacheRuntime::CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) { - std::vector<VkImageCopy> vk_copies(copies.size()); + boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size()); const VkImageAspectFlags aspect_mask = dst.AspectMask(); ASSERT(aspect_mask == src.AspectMask()); @@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, ScaleDown(true); } scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); + auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); const VkBuffer src_buffer = buffer; const VkImage vk_image = *original_image; const VkImageAspectFlags vk_aspect_mask = aspect_mask; @@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS if (is_rescaled) { ScaleDown(); } - boost::container::small_vector<VkBuffer, 1> buffers_vector{}; - boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; + boost::container::small_vector<VkBuffer, 8> buffers_vector{}; + boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> + vk_copies; for (size_t index = 0; index < buffers_span.size(); index++) { buffers_vector.emplace_back(buffers_span[index]); vk_copies.emplace_back( @@ -1802,27 +1803,36 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t // Some games have samplers with garbage. Sanitize them here. const f32 max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f); - sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = pnext, - .flags = 0, - .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), - .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), - .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), - .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), - .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), - .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), - .mipLodBias = tsc.LodBias(), - .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE), - .maxAnisotropy = max_anisotropy, - .compareEnable = tsc.depth_compare_enabled, - .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), - .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), - .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), - .borderColor = - arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color), - .unnormalizedCoordinates = VK_FALSE, - }); + const auto create_sampler = [&](const f32 anisotropy) { + return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = pnext, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.LodBias(), + .anisotropyEnable = static_cast<VkBool32>(anisotropy > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = anisotropy, + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_FLOAT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); + }; + + sampler = create_sampler(max_anisotropy); + + const f32 max_anisotropy_default = static_cast<f32>(1U << tsc.max_anisotropy); + if (max_anisotropy > max_anisotropy_default) { + sampler_default_anisotropy = create_sampler(max_anisotropy_default); + } } Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, @@ -1849,7 +1859,7 @@ Framebuffer::~Framebuffer() = default; void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer, bool is_rescaled) { - std::vector<VkImageView> attachments; + boost::container::small_vector<VkImageView, NUM_RT + 1> attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 0f7a5ffd4..f14525dcb 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -279,8 +279,17 @@ public: return *sampler; } + [[nodiscard]] VkSampler HandleWithDefaultAnisotropy() const noexcept { + return *sampler_default_anisotropy; + } + + [[nodiscard]] bool HasAddedAnisotropy() const noexcept { + return static_cast<bool>(sampler_default_anisotropy); + } + private: vk::Sampler sampler; + vk::Sampler sampler_default_anisotropy; }; class Framebuffer { diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index c5213875b..4db948b6d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() { marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), marked_for_removal.end()); - std::vector<ShaderInfo*> removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); + boost::container::small_vector<ShaderInfo*, 16> removed_shaders; std::scoped_lock lock{lookup_mutex}; - for (Entry* const entry : marked_for_removal) { removed_shaders.push_back(entry->data); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1b8a17ee8..55d49d017 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -6,6 +6,7 @@ #include <array> #include <optional> #include <vector> +#include <boost/container/small_vector.hpp> #include "common/common_funcs.h" #include "common/common_types.h" @@ -108,8 +109,8 @@ struct ImageBase { std::vector<ImageViewInfo> image_view_infos; std::vector<ImageViewId> image_view_ids; - std::vector<u32> slice_offsets; - std::vector<SubresourceBase> slice_subresources; + boost::container::small_vector<u32, 16> slice_offsets; + boost::container::small_vector<SubresourceBase, 16> slice_subresources; std::vector<AliasedImage> aliased_images; std::vector<ImageId> overlapping_images; diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index d134b6738..0c5f4450d 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -45,4 +45,56 @@ ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_in ImageViewBase::ImageViewBase(const NullImageViewParams&) : image_id{NULL_IMAGE_ID} {} +bool ImageViewBase::SupportsAnisotropy() const noexcept { + const bool has_mips = range.extent.levels > 1; + const bool is_2d = type == ImageViewType::e2D || type == ImageViewType::e2DArray; + if (!has_mips || !is_2d) { + return false; + } + + switch (format) { + case PixelFormat::R8_UNORM: + case PixelFormat::R8_SNORM: + case PixelFormat::R8_SINT: + case PixelFormat::R8_UINT: + case PixelFormat::BC4_UNORM: + case PixelFormat::BC4_SNORM: + case PixelFormat::BC5_UNORM: + case PixelFormat::BC5_SNORM: + case PixelFormat::R32G32_FLOAT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R32_FLOAT: + case PixelFormat::R16_FLOAT: + case PixelFormat::R16_UNORM: + case PixelFormat::R16_SNORM: + case PixelFormat::R16_UINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_UNORM: + case PixelFormat::R16G16_FLOAT: + case PixelFormat::R16G16_UINT: + case PixelFormat::R16G16_SINT: + case PixelFormat::R16G16_SNORM: + case PixelFormat::R8G8_UNORM: + case PixelFormat::R8G8_SNORM: + case PixelFormat::R8G8_SINT: + case PixelFormat::R8G8_UINT: + case PixelFormat::R32G32_UINT: + case PixelFormat::R32_UINT: + case PixelFormat::R32_SINT: + case PixelFormat::G4R4_UNORM: + // Depth formats + case PixelFormat::D32_FLOAT: + case PixelFormat::D16_UNORM: + // Stencil formats + case PixelFormat::S8_UINT: + // DepthStencil formats + case PixelFormat::D24_UNORM_S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: + case PixelFormat::D32_FLOAT_S8_UINT: + return false; + default: + return true; + } +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index a25ae1d4a..87549ffff 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -33,6 +33,8 @@ struct ImageViewBase { return type == ImageViewType::Buffer; } + [[nodiscard]] bool SupportsAnisotropy() const noexcept; + ImageId image_id{}; GPUVAddr gpu_addr = 0; PixelFormat format{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c7f7448e9..d3f03a995 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -186,6 +186,10 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { template <class P> void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { + if (!Settings::values.barrier_feedback_loops.GetValue()) { + return; + } + const bool requires_barrier = [&] { for (const auto& view : views) { if (!view.id) { @@ -222,30 +226,50 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { template <class P> typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { + return &slot_samplers[GetGraphicsSamplerId(index)]; +} + +template <class P> +typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { + return &slot_samplers[GetComputeSamplerId(index)]; +} + +template <class P> +SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) { if (index > channel_state->graphics_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; + return NULL_SAMPLER_ID; } const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); SamplerId& id = channel_state->graphics_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } - return &slot_samplers[id]; + return id; } template <class P> -typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { +SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) { if (index > channel_state->compute_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; + return NULL_SAMPLER_ID; } const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); SamplerId& id = channel_state->compute_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } - return &slot_samplers[id]; + return id; +} + +template <class P> +const typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) const noexcept { + return slot_samplers[id]; +} + +template <class P> +typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept { + return slot_samplers[id]; } template <class P> @@ -280,7 +304,7 @@ void TextureCache<P>::SynchronizeComputeDescriptors() { } template <class P> -bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { +bool TextureCache<P>::RescaleRenderTargets() { auto& flags = maxwell3d->dirty.flags; u32 scale_rating = 0; bool rescaled = false; @@ -318,13 +342,13 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; if (flags[Dirty::ColorBuffer0 + index] || force) { flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + BindRenderTarget(&color_buffer_id, FindColorBuffer(index)); } check_rescale(color_buffer_id, tmp_color_images[index]); } if (flags[Dirty::ZetaBuffer] || force) { flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer()); } check_rescale(render_targets.depth_buffer_id, tmp_depth_image); @@ -389,7 +413,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { return; } - const bool rescaled = RescaleRenderTargets(is_clear); + const bool rescaled = RescaleRenderTargets(); if (is_rescaling != rescaled) { flags[Dirty::RescaleViewports] = true; flags[Dirty::RescaleScissors] = true; @@ -502,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { template <class P> void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector<ImageId> images; + boost::container::small_vector<ImageId, 16> images; ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { if (!image.IsSafeDownload()) { return; @@ -555,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V template <class P> void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector<ImageId> deleted_images; + boost::container::small_vector<ImageId, 16> deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; @@ -569,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { template <class P> void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { - std::vector<ImageId> deleted_images; + boost::container::small_vector<ImageId, 16> deleted_images; ForEachImageInRegionGPU(as_id, gpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { @@ -1077,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, const bool native_bgr = runtime.HasNativeBgr(); const bool flexible_formats = True(options & RelaxedOptions::Format); ImageId image_id{}; - boost::container::small_vector<ImageId, 1> image_ids; + boost::container::small_vector<ImageId, 8> image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; @@ -1598,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) } } ImageId image_id{}; - boost::container::small_vector<ImageId, 1> image_ids; + boost::container::small_vector<ImageId, 8> image_ids; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { if (True(existing_image.flags & ImageFlagBits::Remapped)) { return false; @@ -1658,7 +1682,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { } template <class P> -ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { +ImageViewId TextureCache<P>::FindColorBuffer(size_t index) { const auto& regs = maxwell3d->regs; if (index >= regs.rt_control.count) { return ImageViewId{}; @@ -1672,11 +1696,11 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { return ImageViewId{}; } const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); - return FindRenderTargetView(info, gpu_addr, is_clear); + return FindRenderTargetView(info, gpu_addr); } template <class P> -ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { +ImageViewId TextureCache<P>::FindDepthBuffer() { const auto& regs = maxwell3d->regs; if (!regs.zeta_enable) { return ImageViewId{}; @@ -1686,18 +1710,16 @@ ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { return ImageViewId{}; } const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); - return FindRenderTargetView(info, gpu_addr, is_clear); + return FindRenderTargetView(info, gpu_addr); } template <class P> -ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear) { - const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; +ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) { ImageId image_id{}; bool delete_state = has_deleted_images; do { has_deleted_images = false; - image_id = FindOrInsertImage(info, gpu_addr, options); + image_id = FindOrInsertImage(info, gpu_addr); delete_state |= has_deleted_images; } while (has_deleted_images); has_deleted_images = delete_state; @@ -1920,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { image.map_view_id = map_id; return; } - std::vector<ImageViewId> sparse_maps{}; + boost::container::small_vector<ImageViewId, 16> sparse_maps; ForEachSparseSegment( image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); @@ -2195,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { template <class P> void TextureCache<P>::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector<const AliasedImage*, 1> aliased_images; + boost::container::small_vector<const AliasedImage*, 8> aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); bool any_modified = True(image.flags & ImageFlagBits::GpuModified); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 3bfa92154..e9ec91265 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -56,7 +56,7 @@ struct ImageViewInOut { struct AsyncDecodeContext { ImageId image_id; Common::ScratchBuffer<u8> decoded_data; - std::vector<BufferImageCopy> copies; + boost::container::small_vector<BufferImageCopy, 16> copies; std::mutex mutex; std::atomic_bool complete; }; @@ -159,6 +159,18 @@ public: /// Get the sampler from the compute descriptor table in the specified index Sampler* GetComputeSampler(u32 index); + /// Get the sampler id from the graphics descriptor table in the specified index + SamplerId GetGraphicsSamplerId(u32 index); + + /// Get the sampler id from the compute descriptor table in the specified index + SamplerId GetComputeSamplerId(u32 index); + + /// Return a constant reference to the given sampler id + [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept; + + /// Return a reference to the given sampler id + [[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept; + /// Refresh the state for graphics image view and sampler descriptors void SynchronizeGraphicsDescriptors(); @@ -166,9 +178,8 @@ public: void SynchronizeComputeDescriptors(); /// Updates the Render Targets if they can be rescaled - /// @param is_clear True when the render targets are being used for clears /// @retval True if the Render Targets have been rescaled. - bool RescaleRenderTargets(bool is_clear); + bool RescaleRenderTargets(); /// Update bound render targets and upload memory if necessary /// @param is_clear True when the render targets are being used for clears @@ -324,14 +335,13 @@ private: [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); /// Find or create an image view for the given color buffer index - [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); + [[nodiscard]] ImageViewId FindColorBuffer(size_t index); /// Find or create an image view for the depth buffer - [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); + [[nodiscard]] ImageViewId FindDepthBuffer(); /// Find or create a view for a render target with the given image parameters - [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear); + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr); /// Iterates over all the images in a region calling func template <typename Func> @@ -419,7 +429,7 @@ private: std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; - std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; + std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; VAddr virtual_invalid_space{}; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 95a5b47d8..f781cb7a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -329,13 +329,13 @@ template <u32 GOB_EXTENT> [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { - const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); + const auto slice_offsets = CalculateSliceOffsets(new_info); const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); const auto it = std::ranges::find(slice_offsets, diff); if (it == slice_offsets.end()) { return std::nullopt; } - const std::vector subresources = CalculateSliceSubresources(new_info); + const auto subresources = CalculateSliceSubresources(new_info); const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; const ImageInfo& info = overlap.info; if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { @@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { return sizes; } -std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { +boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); - std::vector<u32> offsets; + boost::container::small_vector<u32, 16> offsets; offsets.reserve(NumSlices(info)); const LevelInfo level_info = MakeLevelInfo(info); @@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { return offsets; } -std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { +boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( + const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); - std::vector<SubresourceBase> subresources; + boost::container::small_vector<SubresourceBase, 16> subresources; subresources.reserve(NumSlices(info)); for (s32 level = 0; level < info.resources.levels; ++level) { const s32 depth = AdjustMipSize(info.size.depth, level); @@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { } } -std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, - SubresourceBase base, u32 up_scale, u32 down_shift) { +boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst, + const ImageInfo& src, + SubresourceBase base, + u32 up_scale, u32 down_shift) { ASSERT(dst.resources.levels >= src.resources.levels); const bool is_dst_3d = dst.type == ImageType::e3D; @@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn ASSERT(src.resources.levels == 1); } const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; - std::vector<ImageCopy> copies; + boost::container::small_vector<ImageCopy, 16> copies; copies.reserve(src.resources.levels); for (s32 level = 0; level < src.resources.levels; ++level) { ImageCopy& copy = copies.emplace_back(); @@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, - u32 down_shift) { - std::vector<ImageCopy> copies; +boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src, + u32 up_scale, + u32 down_shift) { + boost::container::small_vector<ImageCopy, 16> copies; copies.reserve(src.resources.levels); const bool is_3d = src.type == ImageType::e3D; for (s32 level = 0; level < src.resources.levels; ++level) { @@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); } -std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageInfo& info, std::span<const u8> input, - std::span<u8> output) { +boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory, + GPUVAddr gpu_addr, + const ImageInfo& info, + std::span<const u8> input, + std::span<u8> output) { const size_t guest_size_bytes = input.size_bytes(); const u32 bpp_log2 = BytesPerBlockLog2(info.format); const Extent3D size = info.size; @@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP info.tile_width_spacing); size_t guest_offset = 0; u32 host_offset = 0; - std::vector<BufferImageCopy> copies(num_levels); + boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); @@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 } } -std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { +boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) { const Extent3D size = info.size; const u32 bytes_per_block = BytesPerBlock(info.format); if (info.type == ImageType::Linear) { @@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { u32 host_offset = 0; - std::vector<BufferImageCopy> copies(num_levels); + boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); @@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { return AdjustMipBlockSize(num_tiles, level_info.block, level); } -std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { +boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) { const Extent2D tile_size = DefaultBlockSize(info.format); if (info.type == ImageType::Linear) { - return std::vector{SwizzleParameters{ + return {SwizzleParameters{ .num_tiles = AdjustTileSize(info.size, tile_size), .block = {}, .buffer_offset = 0, @@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { const s32 num_levels = info.resources.levels; u32 guest_offset = 0; - std::vector<SwizzleParameters> params(num_levels); + boost::container::small_vector<SwizzleParameters, 16> params(num_levels); for (s32 level = 0; level < num_levels; ++level) { const Extent3D level_size = AdjustMipSize(size, level); const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 84aa6880d..ab45a43c4 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -5,6 +5,7 @@ #include <optional> #include <span> +#include <boost/container/small_vector.hpp> #include "common/common_types.h" #include "common/scratch_buffer.h" @@ -40,9 +41,10 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; -[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info); -[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( + const ImageInfo& info); [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); @@ -51,21 +53,18 @@ struct OverlapResult { [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; -[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, - const ImageInfo& src, - SubresourceBase base, u32 up_scale = 1, - u32 down_shift = 0); +[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies( + const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1, + u32 down_shift = 0); -[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, - u32 up_scale = 1, - u32 down_shift = 0); +[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies( + const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0); [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); -[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, - GPUVAddr gpu_addr, const ImageInfo& info, - std::span<const u8> input, - std::span<u8> output); +[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage( + Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, + std::span<const u8> input, std::span<u8> output); [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageBase& image, std::span<u8> output); @@ -73,13 +72,15 @@ struct OverlapResult { void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, std::span<BufferImageCopy> copies); -[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies( + const ImageInfo& info); [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); -[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); +[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles( + const ImageInfo& info); void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, std::span<const BufferImageCopy> copies, std::span<const u8> memory, diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4a80a59f9..d8b88d9bc 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp @@ -62,7 +62,12 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept { } float TSCEntry::MaxAnisotropy() const noexcept { - if (max_anisotropy == 0 && mipmap_filter != TextureMipmapFilter::Linear) { + const bool is_suitable_mipmap_filter = mipmap_filter != TextureMipmapFilter::None; + const bool has_regular_lods = min_lod_clamp == 0 && max_lod_clamp >= 256; + const bool is_bilinear_filter = min_filter == TextureFilter::Linear && + reduction_filter == SamplerReduction::WeightedAverage; + if (max_anisotropy == 0 && (!is_suitable_mipmap_filter || !has_regular_lods || + !is_bilinear_filter || depth_compare_enabled)) { return 1.0f; } const auto anisotropic_settings = Settings::values.max_anisotropy.GetValue(); diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index 155599316..1f353d2df 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -13,7 +13,7 @@ namespace VideoCommon { -std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( +std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings( const TransformFeedbackState& state) { static constexpr std::array VECTORS{ 28U, // gl_Position @@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( 216U, // gl_TexCoord[6] 220U, // gl_TexCoord[7] }; - std::vector<Shader::TransformFeedbackVarying> xfb(256); + std::array<Shader::TransformFeedbackVarying, 256> xfb{}; + u32 count{0}; for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { const auto& locations = state.varyings[buffer]; const auto& layout = state.layouts[buffer]; @@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( } } xfb[attribute] = varying; + count = std::max(count, attribute); highest = std::max(highest, (base_offset + varying.components) * 4); } UNIMPLEMENTED_IF(highest != layout.stride); } - return xfb; + return {xfb, count + 1}; } } // namespace VideoCommon diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index d13eb16c3..401b1352a 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h @@ -24,7 +24,7 @@ struct TransformFeedbackState { varyings; }; -std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( +std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings( const TransformFeedbackState& state); } // namespace VideoCommon diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 3d2e9a16a..b11abe311 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, std::vector<const char*> ExtensionListForVulkan( const std::set<std::string, std::less<>>& extensions) { std::vector<const char*> output; + output.reserve(extensions.size()); for (const auto& extension : extensions) { output.push_back(extension.c_str()); } @@ -562,6 +563,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); cant_blit_msaa = true; } + has_broken_compute = + CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) && + !Settings::values.enable_compute_pipelines.GetValue(); if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); must_emulate_bgr565 = true; @@ -783,9 +787,6 @@ bool Device::GetSuitability(bool requires_swapchain) { FOR_EACH_VK_FEATURE_EXT(FEATURE_EXTENSION); FOR_EACH_VK_EXTENSION(EXTENSION); -#ifdef _WIN32 - FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); -#endif #undef FEATURE_EXTENSION #undef EXTENSION @@ -804,11 +805,6 @@ bool Device::GetSuitability(bool requires_swapchain) { FOR_EACH_VK_RECOMMENDED_EXTENSION(LOG_EXTENSION); FOR_EACH_VK_MANDATORY_EXTENSION(CHECK_EXTENSION); -#ifdef _WIN32 - FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(CHECK_EXTENSION); -#else - FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(CHECK_EXTENSION); -#endif if (requires_swapchain) { CHECK_EXTENSION(VK_KHR_SWAPCHAIN_EXTENSION_NAME); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index f314d0ffe..0b634a876 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -10,6 +10,7 @@ #include <vector> #include "common/common_types.h" +#include "common/logging/log.h" #include "common/settings.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -68,7 +69,6 @@ EXTENSION(EXT, VERTEX_ATTRIBUTE_DIVISOR, vertex_attribute_divisor) \ EXTENSION(KHR, DRAW_INDIRECT_COUNT, draw_indirect_count) \ EXTENSION(KHR, DRIVER_PROPERTIES, driver_properties) \ - EXTENSION(KHR, EXTERNAL_MEMORY_FD, external_memory_fd) \ EXTENSION(KHR, PUSH_DESCRIPTOR, push_descriptor) \ EXTENSION(KHR, SAMPLER_MIRROR_CLAMP_TO_EDGE, sampler_mirror_clamp_to_edge) \ EXTENSION(KHR, SHADER_FLOAT_CONTROLS, shader_float_controls) \ @@ -80,9 +80,6 @@ EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) -#define FOR_EACH_VK_EXTENSION_WIN32(EXTENSION) \ - EXTENSION(KHR, EXTERNAL_MEMORY_WIN32, external_memory_win32) - // Define extensions which must be supported. #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME) \ @@ -90,12 +87,6 @@ EXTENSION_NAME(VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME) \ EXTENSION_NAME(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME) -#define FOR_EACH_VK_MANDATORY_EXTENSION_GENERIC(EXTENSION_NAME) \ - EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME) - -#define FOR_EACH_VK_MANDATORY_EXTENSION_WIN32(EXTENSION_NAME) \ - EXTENSION_NAME(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME) - // Define extensions where the absence of the extension may result in a degraded experience. #define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME) \ @@ -528,6 +519,11 @@ public: return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue(); } + /// @returns True if compute pipelines can cause crashing. + bool HasBrokenCompute() const { + return has_broken_compute; + } + /// Returns true when the device does not properly support cube compatibility. bool HasBrokenCubeImageCompability() const { return has_broken_cube_compatibility; @@ -589,6 +585,22 @@ public: return supports_conditional_barriers; } + [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, + u32 driver_version) { + if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + const u32 major = VK_API_VERSION_MAJOR(driver_version); + const u32 minor = VK_API_VERSION_MINOR(driver_version); + const u32 patch = VK_API_VERSION_PATCH(driver_version); + if (major == 0 && minor == 405 && patch < 286) { + LOG_WARNING( + Render_Vulkan, + "Intel proprietary drivers 0.405.0 until 0.405.286 have broken compute"); + return true; + } + } + return false; + } + private: /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. @@ -636,7 +648,6 @@ private: FOR_EACH_VK_FEATURE_1_3(FEATURE); FOR_EACH_VK_FEATURE_EXT(FEATURE); FOR_EACH_VK_EXTENSION(EXTENSION); - FOR_EACH_VK_EXTENSION_WIN32(EXTENSION); #undef EXTENSION #undef FEATURE @@ -683,6 +694,7 @@ private: bool is_integrated{}; ///< Is GPU an iGPU. bool is_virtual{}; ///< Is GPU a virtual GPU. bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. + bool has_broken_compute{}; ///< Compute shaders can cause crashes bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index 84d9ca796..733c296e4 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -210,6 +210,8 @@ add_executable(yuzu util/url_request_interceptor.h util/util.cpp util/util.h + vk_device_info.cpp + vk_device_info.h compatdb.cpp compatdb.h yuzu.qrc diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index bac9dff90..edc206a25 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -761,6 +761,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); ReadGlobalSetting(Settings::values.enable_compute_pipelines); ReadGlobalSetting(Settings::values.use_video_framerate); + ReadGlobalSetting(Settings::values.barrier_feedback_loops); ReadGlobalSetting(Settings::values.bg_red); ReadGlobalSetting(Settings::values.bg_green); ReadGlobalSetting(Settings::values.bg_blue); @@ -1417,6 +1418,7 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); WriteGlobalSetting(Settings::values.enable_compute_pipelines); WriteGlobalSetting(Settings::values.use_video_framerate); + WriteGlobalSetting(Settings::values.barrier_feedback_loops); WriteGlobalSetting(Settings::values.bg_red); WriteGlobalSetting(Settings::values.bg_green); WriteGlobalSetting(Settings::values.bg_blue); diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index 8e76a819a..bdf83ebfe 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -6,6 +6,7 @@ #include "common/settings.h" #include "core/core.h" #include "ui_configure.h" +#include "vk_device_info.h" #include "yuzu/configuration/config.h" #include "yuzu/configuration/configure_audio.h" #include "yuzu/configuration/configure_cpu.h" @@ -28,6 +29,7 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_, InputCommon::InputSubsystem* input_subsystem, + std::vector<VkDeviceInfo::Record>& vk_device_records, Core::System& system_, bool enable_web_config) : QDialog(parent), ui{std::make_unique<Ui::ConfigureDialog>()}, registry(registry_), system{system_}, audio_tab{std::make_unique<ConfigureAudio>(system_, @@ -38,7 +40,8 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_, general_tab{std::make_unique<ConfigureGeneral>(system_, this)}, graphics_advanced_tab{std::make_unique<ConfigureGraphicsAdvanced>(system_, this)}, graphics_tab{std::make_unique<ConfigureGraphics>( - system_, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this)}, + system_, vk_device_records, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, + this)}, hotkeys_tab{std::make_unique<ConfigureHotkeys>(system_.HIDCore(), this)}, input_tab{std::make_unique<ConfigureInput>(system_, this)}, network_tab{std::make_unique<ConfigureNetwork>(system_, this)}, diff --git a/src/yuzu/configuration/configure_dialog.h b/src/yuzu/configuration/configure_dialog.h index a086a07c4..2a08b7fee 100644 --- a/src/yuzu/configuration/configure_dialog.h +++ b/src/yuzu/configuration/configure_dialog.h @@ -4,7 +4,9 @@ #pragma once #include <memory> +#include <vector> #include <QDialog> +#include "yuzu/vk_device_info.h" namespace Core { class System; @@ -40,8 +42,9 @@ class ConfigureDialog : public QDialog { public: explicit ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_, - InputCommon::InputSubsystem* input_subsystem, Core::System& system_, - bool enable_web_config = true); + InputCommon::InputSubsystem* input_subsystem, + std::vector<VkDeviceInfo::Record>& vk_device_records, + Core::System& system_, bool enable_web_config = true); ~ConfigureDialog() override; void ApplyConfiguration(); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 431585216..a4965524a 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -1,10 +1,6 @@ // SPDX-FileCopyrightText: 2016 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -// Include this early to include Vulkan headers how we want to -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - #include <algorithm> #include <functional> #include <iosfwd> @@ -34,13 +30,11 @@ #include "common/settings.h" #include "core/core.h" #include "ui_configure_graphics.h" -#include "video_core/vulkan_common/vulkan_instance.h" -#include "video_core/vulkan_common/vulkan_library.h" -#include "video_core/vulkan_common/vulkan_surface.h" #include "yuzu/configuration/configuration_shared.h" #include "yuzu/configuration/configure_graphics.h" #include "yuzu/qt_common.h" #include "yuzu/uisettings.h" +#include "yuzu/vk_device_info.h" static const std::vector<VkPresentModeKHR> default_present_modes{VK_PRESENT_MODE_IMMEDIATE_KHR, VK_PRESENT_MODE_FIFO_KHR}; @@ -77,9 +71,10 @@ static constexpr Settings::VSyncMode PresentModeToSetting(VkPresentModeKHR mode) } ConfigureGraphics::ConfigureGraphics(const Core::System& system_, + std::vector<VkDeviceInfo::Record>& records_, const std::function<void()>& expose_compute_option_, QWidget* parent) - : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, + : QWidget(parent), ui{std::make_unique<Ui::ConfigureGraphics>()}, records{records_}, expose_compute_option{expose_compute_option_}, system{system_} { vulkan_device = Settings::values.vulkan_device.GetValue(); RetrieveVulkanDevices(); @@ -504,47 +499,19 @@ void ConfigureGraphics::UpdateAPILayout() { } } -void ConfigureGraphics::RetrieveVulkanDevices() try { - if (UISettings::values.has_broken_vulkan) { - return; - } - - using namespace Vulkan; - - auto* window = this->window()->windowHandle(); - auto wsi = QtCommon::GetWindowSystemInfo(window); - - vk::InstanceDispatch dld; - const auto library = OpenLibrary(); - const vk::Instance instance = CreateInstance(*library, dld, VK_API_VERSION_1_1, wsi.type); - const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); - vk::SurfaceKHR surface = CreateSurface(instance, wsi); - +void ConfigureGraphics::RetrieveVulkanDevices() { vulkan_devices.clear(); - vulkan_devices.reserve(physical_devices.size()); + vulkan_devices.reserve(records.size()); device_present_modes.clear(); - device_present_modes.reserve(physical_devices.size()); - for (const VkPhysicalDevice device : physical_devices) { - const auto physical_device = vk::PhysicalDevice(device, dld); - const std::string name = physical_device.GetProperties().deviceName; - const std::vector<VkPresentModeKHR> present_modes = - physical_device.GetSurfacePresentModesKHR(*surface); - vulkan_devices.push_back(QString::fromStdString(name)); - device_present_modes.push_back(present_modes); - - VkPhysicalDeviceDriverProperties driver_properties{}; - driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; - driver_properties.pNext = nullptr; - VkPhysicalDeviceProperties2 properties{}; - properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; - properties.pNext = &driver_properties; - dld.vkGetPhysicalDeviceProperties2(physical_device, &properties); - if (driver_properties.driverID == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + device_present_modes.reserve(records.size()); + for (const auto& record : records) { + vulkan_devices.push_back(QString::fromStdString(record.name)); + device_present_modes.push_back(record.vsync_support); + + if (record.has_broken_compute) { expose_compute_option(); } } -} catch (const Vulkan::vk::Exception& exception) { - LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what()); } Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 364b1cac2..be9310b74 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -12,6 +12,7 @@ #include <qobjectdefs.h> #include <vulkan/vulkan_core.h> #include "common/common_types.h" +#include "vk_device_info.h" class QEvent; class QObject; @@ -39,6 +40,7 @@ class ConfigureGraphics : public QWidget { public: explicit ConfigureGraphics(const Core::System& system_, + std::vector<VkDeviceInfo::Record>& records, const std::function<void()>& expose_compute_option_, QWidget* parent = nullptr); ~ConfigureGraphics() override; @@ -77,6 +79,7 @@ private: ConfigurationShared::CheckState use_disk_shader_cache; ConfigurationShared::CheckState use_asynchronous_gpu_emulation; + std::vector<VkDeviceInfo::Record>& records; std::vector<QString> vulkan_devices; std::vector<std::vector<VkPresentModeKHR>> device_present_modes; std::vector<VkPresentModeKHR> diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 0463ac8b9..c0a044767 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -43,6 +43,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->enable_compute_pipelines_checkbox->setChecked( Settings::values.enable_compute_pipelines.GetValue()); ui->use_video_framerate_checkbox->setChecked(Settings::values.use_video_framerate.GetValue()); + ui->barrier_feedback_loops_checkbox->setChecked( + Settings::values.barrier_feedback_loops.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setCurrentIndex( @@ -94,6 +96,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { enable_compute_pipelines); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_video_framerate, ui->use_video_framerate_checkbox, use_video_framerate); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.barrier_feedback_loops, + ui->barrier_feedback_loops_checkbox, + barrier_feedback_loops); } void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { @@ -130,6 +135,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { Settings::values.enable_compute_pipelines.UsingGlobal()); ui->use_video_framerate_checkbox->setEnabled( Settings::values.use_video_framerate.UsingGlobal()); + ui->barrier_feedback_loops_checkbox->setEnabled( + Settings::values.barrier_feedback_loops.UsingGlobal()); return; } @@ -157,6 +164,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->use_video_framerate_checkbox, Settings::values.use_video_framerate, use_video_framerate); + ConfigurationShared::SetColoredTristate(ui->barrier_feedback_loops_checkbox, + Settings::values.barrier_feedback_loops, + barrier_feedback_loops); ConfigurationShared::SetColoredComboBox( ui->gpu_accuracy, ui->label_gpu_accuracy, static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index a4dc8ceb0..369a7c83e 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -48,6 +48,7 @@ private: ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; ConfigurationShared::CheckState enable_compute_pipelines; ConfigurationShared::CheckState use_video_framerate; + ConfigurationShared::CheckState barrier_feedback_loops; const Core::System& system; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index e7f0ef6be..d527a6f38 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -202,6 +202,16 @@ Compute pipelines are always enabled on all other drivers.</string> </widget> </item> <item> + <widget class="QCheckBox" name="barrier_feedback_loops_checkbox"> + <property name="toolTip"> + <string>Improves rendering of transparency effects in specific games.</string> + </property> + <property name="text"> + <string>Barrier feedback loops</string> + </property> + </widget> + </item> + <item> <widget class="QWidget" name="af_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_1"> <property name="leftMargin"> diff --git a/src/yuzu/configuration/configure_per_game.cpp b/src/yuzu/configuration/configure_per_game.cpp index 7ac162586..eb96e6068 100644 --- a/src/yuzu/configuration/configure_per_game.cpp +++ b/src/yuzu/configuration/configure_per_game.cpp @@ -6,6 +6,7 @@ #include <memory> #include <string> #include <utility> +#include <vector> #include <fmt/format.h> @@ -34,8 +35,10 @@ #include "yuzu/configuration/configure_system.h" #include "yuzu/uisettings.h" #include "yuzu/util/util.h" +#include "yuzu/vk_device_info.h" ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const std::string& file_name, + std::vector<VkDeviceInfo::Record>& vk_device_records, Core::System& system_) : QDialog(parent), ui(std::make_unique<Ui::ConfigurePerGame>()), title_id{title_id_}, system{system_} { @@ -50,7 +53,7 @@ ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const std::st general_tab = std::make_unique<ConfigureGeneral>(system_, this); graphics_advanced_tab = std::make_unique<ConfigureGraphicsAdvanced>(system_, this); graphics_tab = std::make_unique<ConfigureGraphics>( - system_, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this); + system_, vk_device_records, [&]() { graphics_advanced_tab->ExposeComputeOption(); }, this); input_tab = std::make_unique<ConfigureInputPerGame>(system_, game_config.get(), this); system_tab = std::make_unique<ConfigureSystem>(system_, this); diff --git a/src/yuzu/configuration/configure_per_game.h b/src/yuzu/configuration/configure_per_game.h index 85752f1fa..7ec1ded06 100644 --- a/src/yuzu/configuration/configure_per_game.h +++ b/src/yuzu/configuration/configure_per_game.h @@ -5,11 +5,13 @@ #include <memory> #include <string> +#include <vector> #include <QDialog> #include <QList> #include "core/file_sys/vfs_types.h" +#include "vk_device_info.h" #include "yuzu/configuration/config.h" namespace Core { @@ -45,6 +47,7 @@ class ConfigurePerGame : public QDialog { public: // Cannot use std::filesystem::path due to https://bugreports.qt.io/browse/QTBUG-73263 explicit ConfigurePerGame(QWidget* parent, u64 title_id_, const std::string& file_name, + std::vector<VkDeviceInfo::Record>& vk_device_records, Core::System& system_); ~ConfigurePerGame() override; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index cba7c3cce..45a39451d 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -147,6 +147,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "yuzu/startup_checks.h" #include "yuzu/uisettings.h" #include "yuzu/util/clickable_label.h" +#include "yuzu/vk_device_info.h" #ifdef YUZU_DBGHELP #include "yuzu/mini_dump.h" @@ -440,6 +441,8 @@ GMainWindow::GMainWindow(std::unique_ptr<Config> config_, bool has_broken_vulkan renderer_status_button->setDisabled(true); renderer_status_button->setChecked(false); + } else { + VkDeviceInfo::PopulateRecords(vk_device_records, this->window()->windowHandle()); } #if defined(HAVE_SDL2) && !defined(_WIN32) @@ -3494,7 +3497,8 @@ void GMainWindow::OnConfigure() { const auto old_language_index = Settings::values.language_index.GetValue(); Settings::SetConfiguringGlobal(true); - ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get(), *system, + ConfigureDialog configure_dialog(this, hotkey_registry, input_subsystem.get(), + vk_device_records, *system, !multiplayer_state->IsHostingPublicRoom()); connect(&configure_dialog, &ConfigureDialog::LanguageChanged, this, &GMainWindow::OnLanguageChanged); @@ -3765,7 +3769,7 @@ void GMainWindow::OpenPerGameConfiguration(u64 title_id, const std::string& file const auto v_file = Core::GetGameFileFromPath(vfs, file_name); Settings::SetConfiguringGlobal(false); - ConfigurePerGame dialog(this, title_id, file_name, *system); + ConfigurePerGame dialog(this, title_id, file_name, vk_device_records, *system); dialog.LoadFromFile(v_file); const auto result = dialog.exec(); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 6bb70972f..e0e775d87 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -118,6 +118,10 @@ enum class ReinitializeKeyBehavior { Warning, }; +namespace VkDeviceInfo { +class Record; +} + class GMainWindow : public QMainWindow { Q_OBJECT @@ -418,6 +422,8 @@ private: GameListPlaceholder* game_list_placeholder; + std::vector<VkDeviceInfo::Record> vk_device_records; + // Status bar elements QLabel* message_label = nullptr; QLabel* shader_building_label = nullptr; diff --git a/src/yuzu/vk_device_info.cpp b/src/yuzu/vk_device_info.cpp new file mode 100644 index 000000000..7c26a3dc7 --- /dev/null +++ b/src/yuzu/vk_device_info.cpp @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <utility> +#include <vector> +#include "common/dynamic_library.h" +#include "common/logging/log.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_instance.h" +#include "video_core/vulkan_common/vulkan_library.h" +#include "video_core/vulkan_common/vulkan_surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" +#include "vulkan/vulkan_core.h" +#include "yuzu/qt_common.h" +#include "yuzu/vk_device_info.h" + +class QWindow; + +namespace VkDeviceInfo { +Record::Record(std::string_view name_, const std::vector<VkPresentModeKHR>& vsync_modes_, + bool has_broken_compute_) + : name{name_}, vsync_support{vsync_modes_}, has_broken_compute{has_broken_compute_} {} + +Record::~Record() = default; + +void PopulateRecords(std::vector<Record>& records, QWindow* window) try { + using namespace Vulkan; + + auto wsi = QtCommon::GetWindowSystemInfo(window); + + vk::InstanceDispatch dld; + const auto library = OpenLibrary(); + const vk::Instance instance = CreateInstance(*library, dld, VK_API_VERSION_1_1, wsi.type); + const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); + vk::SurfaceKHR surface = CreateSurface(instance, wsi); + + records.clear(); + records.reserve(physical_devices.size()); + for (const VkPhysicalDevice device : physical_devices) { + const auto physical_device = vk::PhysicalDevice(device, dld); + const std::string name = physical_device.GetProperties().deviceName; + const std::vector<VkPresentModeKHR> present_modes = + physical_device.GetSurfacePresentModesKHR(*surface); + + VkPhysicalDeviceDriverProperties driver_properties{}; + driver_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + driver_properties.pNext = nullptr; + VkPhysicalDeviceProperties2 properties{}; + properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties.pNext = &driver_properties; + dld.vkGetPhysicalDeviceProperties2(physical_device, &properties); + + bool has_broken_compute{Vulkan::Device::CheckBrokenCompute( + driver_properties.driverID, properties.properties.driverVersion)}; + + records.push_back(VkDeviceInfo::Record(name, present_modes, has_broken_compute)); + } +} catch (const Vulkan::vk::Exception& exception) { + LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what()); +} +} // namespace VkDeviceInfo diff --git a/src/yuzu/vk_device_info.h b/src/yuzu/vk_device_info.h new file mode 100644 index 000000000..bda8262f4 --- /dev/null +++ b/src/yuzu/vk_device_info.h @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <algorithm> +#include <iterator> +#include <memory> +#include <string> +#include <string_view> +#include <vector> +#include "common/common_types.h" +#include "vulkan/vulkan_core.h" + +class QWindow; + +namespace Settings { +enum class VSyncMode : u32; +} +// #include "common/settings.h" + +namespace VkDeviceInfo { +// Short class to record Vulkan driver information for configuration purposes +class Record { +public: + explicit Record(std::string_view name, const std::vector<VkPresentModeKHR>& vsync_modes, + bool has_broken_compute); + ~Record(); + + const std::string name; + const std::vector<VkPresentModeKHR> vsync_support; + const bool has_broken_compute; +}; + +void PopulateRecords(std::vector<Record>& records, QWindow* window); +} // namespace VkDeviceInfo |