diff options
| author | Ameer <aj662@drexel.edu> | 2020-07-04 00:59:40 -0400 | 
|---|---|---|
| committer | Ameer <aj662@drexel.edu> | 2020-07-04 00:59:40 -0400 | 
| commit | f829932ed191ad469df01342191bf2725e8a20bb (patch) | |
| tree | 0ae185ce3ef43ef9b085aae7b9ad5abb04e3d239 | |
| parent | d00972fce1fe5f2eb13c7e5d7e4e56036cb6bc91 (diff) | |
| parent | 3096adb3471af1b094d670751e476c337007d299 (diff) | |
Fix merge conflicts?
237 files changed, 7778 insertions, 2726 deletions
diff --git a/.gitmodules b/.gitmodules index e3ec628ea..79028bbb5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -37,3 +37,6 @@  [submodule "externals/libusb"]  	path = externals/libusb  	url = https://github.com/ameerj/libusb +[submodule "opus"] +	path = externals/opus/opus +	url = https://github.com/xiph/opus.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 967968226..27383bce8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,8 +156,6 @@ macro(yuzu_find_packages)          #"libzip            1.5         libzip/1.5.2@bincrafters/stable"          "lz4               1.8         lz4/1.9.2"          "nlohmann_json     3.7         nlohmann_json/3.7.3" -    # we need to be careful as the version check might be broken https://github.com/xiph/opus/issues/110 -        "opus              1.3         opus/1.3.1"          "ZLIB              1.2         zlib/1.2.11"          "zstd              1.4         zstd/1.4.4"      ) @@ -214,6 +212,9 @@ if(ENABLE_QT)          set(QT_PREFIX_HINT HINTS "${QT_PREFIX}")      endif()      find_package(Qt5 5.9 COMPONENTS Widgets OpenGL ${QT_PREFIX_HINT}) +    if (YUZU_USE_QT_WEB_ENGINE) +        find_package(Qt5 COMPONENTS WebEngineCore WebEngineWidgets) +    endif()      if (NOT Qt5_FOUND)          list(APPEND CONAN_REQUIRED_LIBS "qt/5.14.1@bincrafters/stable")      endif() diff --git a/dist/yuzu.manifest b/dist/yuzu.manifest index fd30b656f..038edff23 100644 --- a/dist/yuzu.manifest +++ b/dist/yuzu.manifest @@ -1,24 +1,58 @@  <?xml version="1.0" encoding="UTF-8" standalone="yes"?> -<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0"> - <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3"> -  <security> -   <requestedPrivileges> -    <requestedExecutionLevel level="asInvoker" uiAccess="false"/> -   </requestedPrivileges> -  </security> - </trustInfo> - <application xmlns="urn:schemas-microsoft-com:asm.v3"> -  <windowsSettings> -   <dpiAware xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings">True/PM</dpiAware> -   <longPathAware xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">true</longPathAware> -  </windowsSettings> - </application> - <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1"> -  <application> -   <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/> -   <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/> -   <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/> -   <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/> -  </application> - </compatibility> -</assembly>
\ No newline at end of file +<assembly manifestVersion="1.0" +    xmlns="urn:schemas-microsoft-com:asm.v1" +    xmlns:asmv3="urn:schemas-microsoft-com:asm.v3"> +  <asmv3:application> +    <asmv3:windowsSettings> +      <!-- Windows 7/8/8.1/10 --> +      <dpiAware +        xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings"> +        true/pm +      </dpiAware> +      <!-- Windows 10, version 1607 or later --> +      <dpiAwareness +        xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings"> +        PerMonitorV2 +      </dpiAwareness> +      <!-- Windows 10, version 1703 or later --> +      <gdiScaling +          xmlns="http://schemas.microsoft.com/SMI/2017/WindowsSettings"> +        true +      </gdiScaling> +      <ws2:longPathAware +          xmlns:ws3="http://schemas.microsoft.com/SMI/2016/WindowsSettings"> +        true +      </ws2:longPathAware> +    </asmv3:windowsSettings> +  </asmv3:application> +  <compatibility +      xmlns="urn:schemas-microsoft-com:compatibility.v1"> +    <application> +      <!-- Windows 10 --> +      <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/> +      <!-- Windows 8.1 --> +      <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/> +      <!-- Windows 8 --> +      <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/> +      <!-- Windows 7 --> +      <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/> +    </application> +  </compatibility> +  <trustInfo +      xmlns="urn:schemas-microsoft-com:asm.v3"> +    <security> +      <requestedPrivileges> +        <!-- +          UAC settings: +          - app should run at same integrity level as calling process +          - app does not need to manipulate windows belonging to +            higher-integrity-level processes +          --> +        <requestedExecutionLevel +            level="asInvoker" +            uiAccess="false" +        /> +      </requestedPrivileges> +    </security> +  </trustInfo> +</assembly> diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index b80b27605..d1dcc403b 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -91,3 +91,6 @@ if (ENABLE_WEB_SERVICE)      target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)      target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES})  endif() + +# Opus +add_subdirectory(opus) diff --git a/externals/dynarmic b/externals/dynarmic -Subproject e7166e8ba74d7b9c85e87afc0aaf667e7e84cfe +Subproject 4f967387c07365b7ea35d2fa3e19b7df8872a09 diff --git a/externals/opus/CMakeLists.txt b/externals/opus/CMakeLists.txt new file mode 100644 index 000000000..94a86551f --- /dev/null +++ b/externals/opus/CMakeLists.txt @@ -0,0 +1,254 @@ +cmake_minimum_required(VERSION 3.8) + +project(opus) + +option(OPUS_STACK_PROTECTOR "Use stack protection" OFF) +option(OPUS_USE_ALLOCA "Use alloca for stack arrays (on non-C99 compilers)" OFF) +option(OPUS_CUSTOM_MODES "Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames" OFF) +option(OPUS_FIXED_POINT "Compile as fixed-point (for machines without a fast enough FPU)" OFF) +option(OPUS_ENABLE_FLOAT_API "Compile with the floating point API (for machines with float library" ON) + +include(opus/opus_functions.cmake) + +if(OPUS_STACK_PROTECTOR) +    if(NOT MSVC) # GC on by default on MSVC +        check_and_set_flag(STACK_PROTECTION_STRONG -fstack-protector-strong) +    endif() +else() +    if(MSVC) +        check_and_set_flag(BUFFER_SECURITY_CHECK /GS-) +    endif() +endif() + +add_library(opus STATIC +    # CELT sources +    opus/celt/bands.c +    opus/celt/celt.c +    opus/celt/celt_decoder.c +    opus/celt/celt_encoder.c +    opus/celt/celt_lpc.c +    opus/celt/cwrs.c +    opus/celt/entcode.c +    opus/celt/entdec.c +    opus/celt/entenc.c +    opus/celt/kiss_fft.c +    opus/celt/laplace.c +    opus/celt/mathops.c +    opus/celt/mdct.c +    opus/celt/modes.c +    opus/celt/pitch.c +    opus/celt/quant_bands.c +    opus/celt/rate.c +    opus/celt/vq.c + +    # SILK sources +    opus/silk/A2NLSF.c +    opus/silk/CNG.c +    opus/silk/HP_variable_cutoff.c +    opus/silk/LPC_analysis_filter.c +    opus/silk/LPC_fit.c +    opus/silk/LPC_inv_pred_gain.c +    opus/silk/LP_variable_cutoff.c +    opus/silk/NLSF2A.c +    opus/silk/NLSF_VQ.c +    opus/silk/NLSF_VQ_weights_laroia.c +    opus/silk/NLSF_decode.c +    opus/silk/NLSF_del_dec_quant.c +    opus/silk/NLSF_encode.c +    opus/silk/NLSF_stabilize.c +    opus/silk/NLSF_unpack.c +    opus/silk/NSQ.c +    opus/silk/NSQ_del_dec.c +    opus/silk/PLC.c +    opus/silk/VAD.c +    opus/silk/VQ_WMat_EC.c +    opus/silk/ana_filt_bank_1.c +    opus/silk/biquad_alt.c +    opus/silk/bwexpander.c +    opus/silk/bwexpander_32.c +    opus/silk/check_control_input.c +    opus/silk/code_signs.c +    opus/silk/control_SNR.c +    opus/silk/control_audio_bandwidth.c +    opus/silk/control_codec.c +    opus/silk/dec_API.c +    opus/silk/decode_core.c +    opus/silk/decode_frame.c +    opus/silk/decode_indices.c +    opus/silk/decode_parameters.c +    opus/silk/decode_pitch.c +    opus/silk/decode_pulses.c +    opus/silk/decoder_set_fs.c +    opus/silk/enc_API.c +    opus/silk/encode_indices.c +    opus/silk/encode_pulses.c +    opus/silk/gain_quant.c +    opus/silk/init_decoder.c +    opus/silk/init_encoder.c +    opus/silk/inner_prod_aligned.c +    opus/silk/interpolate.c +    opus/silk/lin2log.c +    opus/silk/log2lin.c +    opus/silk/pitch_est_tables.c +    opus/silk/process_NLSFs.c +    opus/silk/quant_LTP_gains.c +    opus/silk/resampler.c +    opus/silk/resampler_down2.c +    opus/silk/resampler_down2_3.c +    opus/silk/resampler_private_AR2.c +    opus/silk/resampler_private_IIR_FIR.c +    opus/silk/resampler_private_down_FIR.c +    opus/silk/resampler_private_up2_HQ.c +    opus/silk/resampler_rom.c +    opus/silk/shell_coder.c +    opus/silk/sigm_Q15.c +    opus/silk/sort.c +    opus/silk/stereo_LR_to_MS.c +    opus/silk/stereo_MS_to_LR.c +    opus/silk/stereo_decode_pred.c +    opus/silk/stereo_encode_pred.c +    opus/silk/stereo_find_predictor.c +    opus/silk/stereo_quant_pred.c +    opus/silk/sum_sqr_shift.c +    opus/silk/table_LSF_cos.c +    opus/silk/tables_LTP.c +    opus/silk/tables_NLSF_CB_NB_MB.c +    opus/silk/tables_NLSF_CB_WB.c +    opus/silk/tables_gain.c +    opus/silk/tables_other.c +    opus/silk/tables_pitch_lag.c +    opus/silk/tables_pulses_per_block.c + +    # Opus sources +    opus/src/analysis.c +    opus/src/mapping_matrix.c +    opus/src/mlp.c +    opus/src/mlp_data.c +    opus/src/opus.c +    opus/src/opus_decoder.c +    opus/src/opus_encoder.c +    opus/src/opus_multistream.c +    opus/src/opus_multistream_decoder.c +    opus/src/opus_multistream_encoder.c +    opus/src/opus_projection_decoder.c +    opus/src/opus_projection_encoder.c +    opus/src/repacketizer.c +) + +if (DEBUG) +    target_sources(opus PRIVATE opus/silk/debug.c) +endif() + +if (OPUS_FIXED_POINT) +    target_sources(opus PRIVATE +        opus/silk/fixed/LTP_analysis_filter_FIX.c +        opus/silk/fixed/LTP_scale_ctrl_FIX.c +        opus/silk/fixed/apply_sine_window_FIX.c +        opus/silk/fixed/autocorr_FIX.c +        opus/silk/fixed/burg_modified_FIX.c +        opus/silk/fixed/corrMatrix_FIX.c +        opus/silk/fixed/encode_frame_FIX.c +        opus/silk/fixed/find_LPC_FIX.c +        opus/silk/fixed/find_LTP_FIX.c +        opus/silk/fixed/find_pitch_lags_FIX.c +        opus/silk/fixed/find_pred_coefs_FIX.c +        opus/silk/fixed/k2a_FIX.c +        opus/silk/fixed/k2a_Q16_FIX.c +        opus/silk/fixed/noise_shape_analysis_FIX.c +        opus/silk/fixed/pitch_analysis_core_FIX.c +        opus/silk/fixed/prefilter_FIX.c +        opus/silk/fixed/process_gains_FIX.c +        opus/silk/fixed/regularize_correlations_FIX.c +        opus/silk/fixed/residual_energy16_FIX.c +        opus/silk/fixed/residual_energy_FIX.c +        opus/silk/fixed/schur64_FIX.c +        opus/silk/fixed/schur_FIX.c +        opus/silk/fixed/solve_LS_FIX.c +        opus/silk/fixed/vector_ops_FIX.c +        opus/silk/fixed/warped_autocorrelation_FIX.c +    ) +else() +    target_sources(opus PRIVATE +        opus/silk/float/LPC_analysis_filter_FLP.c +        opus/silk/float/LPC_inv_pred_gain_FLP.c +        opus/silk/float/LTP_analysis_filter_FLP.c +        opus/silk/float/LTP_scale_ctrl_FLP.c +        opus/silk/float/apply_sine_window_FLP.c +        opus/silk/float/autocorrelation_FLP.c +        opus/silk/float/burg_modified_FLP.c +        opus/silk/float/bwexpander_FLP.c +        opus/silk/float/corrMatrix_FLP.c +        opus/silk/float/encode_frame_FLP.c +        opus/silk/float/energy_FLP.c +        opus/silk/float/find_LPC_FLP.c +        opus/silk/float/find_LTP_FLP.c +        opus/silk/float/find_pitch_lags_FLP.c +        opus/silk/float/find_pred_coefs_FLP.c +        opus/silk/float/inner_product_FLP.c +        opus/silk/float/k2a_FLP.c +        opus/silk/float/noise_shape_analysis_FLP.c +        opus/silk/float/pitch_analysis_core_FLP.c +        opus/silk/float/process_gains_FLP.c +        opus/silk/float/regularize_correlations_FLP.c +        opus/silk/float/residual_energy_FLP.c +        opus/silk/float/scale_copy_vector_FLP.c +        opus/silk/float/scale_vector_FLP.c +        opus/silk/float/schur_FLP.c +        opus/silk/float/sort_FLP.c +        opus/silk/float/warped_autocorrelation_FLP.c +        opus/silk/float/wrappers_FLP.c +    ) +endif() + +target_compile_definitions(opus PRIVATE OPUS_BUILD ENABLE_HARDENING) + +if(NOT MSVC) +    if(MINGW) +        target_compile_definitions(opus PRIVATE _FORTIFY_SOURCE=0) +    else() +        target_compile_definitions(opus PRIVATE _FORTIFY_SOURCE=2) +    endif() +endif() + +# It is strongly recommended to uncomment one of these VAR_ARRAYS: Use C99 +# variable-length arrays for stack allocation USE_ALLOCA: Use alloca() for stack +# allocation If none is defined, then the fallback is a non-threadsafe global +# array +if(OPUS_USE_ALLOCA OR MSVC) +    target_compile_definitions(opus PRIVATE USE_ALLOCA) +else() +    target_compile_definitions(opus PRIVATE VAR_ARRAYS) +endif() + +if(OPUS_CUSTOM_MODES) +    target_compile_definitions(opus PRIVATE CUSTOM_MODES) +endif() + +if(NOT OPUS_ENABLE_FLOAT_API) +    target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API) +endif() + +target_compile_definitions(opus +PUBLIC +    -DOPUS_VERSION="\\"1.3.1\\"" + +PRIVATE +    # Use C99 intrinsics to speed up float-to-int conversion +    HAVE_LRINTF +) + +if (FIXED_POINT) +    target_compile_definitions(opus PRIVATE -DFIXED_POINT=1 -DDISABLE_FLOAT_API) +endif() + +target_include_directories(opus +PUBLIC +    opus/include + +PRIVATE +    opus/celt +    opus/silk +    opus/silk/fixed +    opus/silk/float +    opus/src +) diff --git a/externals/opus/opus b/externals/opus/opus new file mode 160000 +Subproject ad8fe90db79b7d2a135e3dfd2ed6631b0c5662a diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3a57356ab..1e977e8a8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,6 +62,10 @@ else()          -Wno-unused-parameter      ) +    if (ARCHITECTURE_x86_64) +        add_compile_options("-mcx16") +    endif() +      if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)          add_compile_options("-stdlib=libc++")      endif() diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index 4ca98f8ea..dfc4805d9 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp @@ -59,15 +59,24 @@ Stream::State Stream::GetState() const {      return state;  } -s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const { +s64 Stream::GetBufferReleaseNS(const Buffer& buffer) const {      const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; -    const auto us = -        std::chrono::microseconds((static_cast<u64>(num_samples) * 1000000) / sample_rate); -    return Core::Timing::usToCycles(us); +    const auto ns = +        std::chrono::nanoseconds((static_cast<u64>(num_samples) * 1000000000ULL) / sample_rate); +    return ns.count(); +} + +s64 Stream::GetBufferReleaseNSHostTiming(const Buffer& buffer) const { +    const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()}; +    /// DSP signals before playing the last sample, in HLE we emulate this in this way +    s64 base_samples = std::max<s64>(static_cast<s64>(num_samples) - 1, 0); +    const auto ns = +        std::chrono::nanoseconds((static_cast<u64>(base_samples) * 1000000000ULL) / sample_rate); +    return ns.count();  }  static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) { -    const float volume{std::clamp(Settings::values.volume - (1.0f - game_volume), 0.0f, 1.0f)}; +    const float volume{std::clamp(Settings::Volume() - (1.0f - game_volume), 0.0f, 1.0f)};      if (volume == 1.0f) {          return; @@ -105,7 +114,11 @@ void Stream::PlayNextBuffer() {      sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); -    core_timing.ScheduleEvent(GetBufferReleaseCycles(*active_buffer), release_event, {}); +    if (core_timing.IsHostTiming()) { +        core_timing.ScheduleEvent(GetBufferReleaseNSHostTiming(*active_buffer), release_event, {}); +    } else { +        core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer), release_event, {}); +    }  }  void Stream::ReleaseActiveBuffer() { diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h index 1708a4d98..e309d60fe 100644 --- a/src/audio_core/stream.h +++ b/src/audio_core/stream.h @@ -96,7 +96,10 @@ private:      void ReleaseActiveBuffer();      /// Gets the number of core cycles when the specified buffer will be released -    s64 GetBufferReleaseCycles(const Buffer& buffer) const; +    s64 GetBufferReleaseNS(const Buffer& buffer) const; + +    /// Gets the number of core cycles when the specified buffer will be released +    s64 GetBufferReleaseNSHostTiming(const Buffer& buffer) const;      u32 sample_rate;                  ///< Sample rate of the stream      Format format;                    ///< Format of the stream diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 0a3e2f4d1..d120c8d3d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -98,6 +98,8 @@ add_library(common STATIC      algorithm.h      alignment.h      assert.h +    atomic_ops.cpp +    atomic_ops.h      detached_tasks.cpp      detached_tasks.h      bit_field.h @@ -110,6 +112,8 @@ add_library(common STATIC      common_types.h      dynamic_library.cpp      dynamic_library.h +    fiber.cpp +    fiber.h      file_util.cpp      file_util.h      hash.h @@ -143,6 +147,8 @@ add_library(common STATIC      scm_rev.cpp      scm_rev.h      scope_exit.h +    spin_lock.cpp +    spin_lock.h      string_util.cpp      string_util.h      swap.h @@ -163,6 +169,8 @@ add_library(common STATIC      vector_math.h      virtual_buffer.cpp      virtual_buffer.h +    wall_clock.cpp +    wall_clock.h      web_result.h      zstd_compression.cpp      zstd_compression.h @@ -173,12 +181,15 @@ if(ARCHITECTURE_x86_64)          PRIVATE              x64/cpu_detect.cpp              x64/cpu_detect.h +            x64/native_clock.cpp +            x64/native_clock.h              x64/xbyak_abi.h              x64/xbyak_util.h      )  endif()  create_target_directory_groups(common) +find_package(Boost 1.71 COMPONENTS context headers REQUIRED) -target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) +target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile)  target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp new file mode 100644 index 000000000..1098e21ff --- /dev/null +++ b/src/common/atomic_ops.cpp @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> + +#include "common/atomic_ops.h" + +#if _MSC_VER +#include <intrin.h> +#endif + +namespace Common { + +#if _MSC_VER + +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) { +    u8 result = _InterlockedCompareExchange8((char*)pointer, value, expected); +    return result == expected; +} + +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) { +    u16 result = _InterlockedCompareExchange16((short*)pointer, value, expected); +    return result == expected; +} + +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) { +    u32 result = _InterlockedCompareExchange((long*)pointer, value, expected); +    return result == expected; +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { +    u64 result = _InterlockedCompareExchange64((__int64*)pointer, value, expected); +    return result == expected; +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { +    return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0], +                                          (__int64*)expected.data()) != 0; +} + +#else + +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) { +    return __sync_bool_compare_and_swap(pointer, expected, value); +} + +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) { +    return __sync_bool_compare_and_swap(pointer, expected, value); +} + +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) { +    return __sync_bool_compare_and_swap(pointer, expected, value); +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) { +    return __sync_bool_compare_and_swap(pointer, expected, value); +} + +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) { +    unsigned __int128 value_a; +    unsigned __int128 expected_a; +    std::memcpy(&value_a, value.data(), sizeof(u128)); +    std::memcpy(&expected_a, expected.data(), sizeof(u128)); +    return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); +} + +#endif + +} // namespace Common diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h new file mode 100644 index 000000000..e6181d521 --- /dev/null +++ b/src/common/atomic_ops.h @@ -0,0 +1,17 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Common { + +bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected); +bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected); +bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected); +bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected); +bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected); + +} // namespace Common diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp new file mode 100644 index 000000000..1c1d09ccb --- /dev/null +++ b/src/common/fiber.cpp @@ -0,0 +1,222 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/fiber.h" +#if defined(_WIN32) || defined(WIN32) +#include <windows.h> +#else +#include <boost/context/detail/fcontext.hpp> +#endif + +namespace Common { + +constexpr std::size_t default_stack_size = 256 * 1024; // 256kb + +#if defined(_WIN32) || defined(WIN32) + +struct Fiber::FiberImpl { +    LPVOID handle = nullptr; +    LPVOID rewind_handle = nullptr; +}; + +void Fiber::Start() { +    ASSERT(previous_fiber != nullptr); +    previous_fiber->guard.unlock(); +    previous_fiber.reset(); +    entry_point(start_parameter); +    UNREACHABLE(); +} + +void Fiber::OnRewind() { +    ASSERT(impl->handle != nullptr); +    DeleteFiber(impl->handle); +    impl->handle = impl->rewind_handle; +    impl->rewind_handle = nullptr; +    rewind_point(rewind_parameter); +    UNREACHABLE(); +} + +void Fiber::FiberStartFunc(void* fiber_parameter) { +    auto fiber = static_cast<Fiber*>(fiber_parameter); +    fiber->Start(); +} + +void Fiber::RewindStartFunc(void* fiber_parameter) { +    auto fiber = static_cast<Fiber*>(fiber_parameter); +    fiber->OnRewind(); +} + +Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter) +    : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} { +    impl = std::make_unique<FiberImpl>(); +    impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this); +} + +Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {} + +Fiber::~Fiber() { +    if (released) { +        return; +    } +    // Make sure the Fiber is not being used +    const bool locked = guard.try_lock(); +    ASSERT_MSG(locked, "Destroying a fiber that's still running"); +    if (locked) { +        guard.unlock(); +    } +    DeleteFiber(impl->handle); +} + +void Fiber::Exit() { +    ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber"); +    if (!is_thread_fiber) { +        return; +    } +    ConvertFiberToThread(); +    guard.unlock(); +    released = true; +} + +void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) { +    rewind_point = std::move(rewind_func); +    rewind_parameter = start_parameter; +} + +void Fiber::Rewind() { +    ASSERT(rewind_point); +    ASSERT(impl->rewind_handle == nullptr); +    impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this); +    SwitchToFiber(impl->rewind_handle); +} + +void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) { +    ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); +    ASSERT_MSG(to != nullptr, "Next fiber is null!"); +    to->guard.lock(); +    to->previous_fiber = from; +    SwitchToFiber(to->impl->handle); +    ASSERT(from->previous_fiber != nullptr); +    from->previous_fiber->guard.unlock(); +    from->previous_fiber.reset(); +} + +std::shared_ptr<Fiber> Fiber::ThreadToFiber() { +    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()}; +    fiber->guard.lock(); +    fiber->impl->handle = ConvertThreadToFiber(nullptr); +    fiber->is_thread_fiber = true; +    return fiber; +} + +#else + +struct Fiber::FiberImpl { +    alignas(64) std::array<u8, default_stack_size> stack; +    alignas(64) std::array<u8, default_stack_size> rewind_stack; +    u8* stack_limit; +    u8* rewind_stack_limit; +    boost::context::detail::fcontext_t context; +    boost::context::detail::fcontext_t rewind_context; +}; + +void Fiber::Start(boost::context::detail::transfer_t& transfer) { +    ASSERT(previous_fiber != nullptr); +    previous_fiber->impl->context = transfer.fctx; +    previous_fiber->guard.unlock(); +    previous_fiber.reset(); +    entry_point(start_parameter); +    UNREACHABLE(); +} + +void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) { +    ASSERT(impl->context != nullptr); +    impl->context = impl->rewind_context; +    impl->rewind_context = nullptr; +    u8* tmp = impl->stack_limit; +    impl->stack_limit = impl->rewind_stack_limit; +    impl->rewind_stack_limit = tmp; +    rewind_point(rewind_parameter); +    UNREACHABLE(); +} + +void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) { +    auto fiber = static_cast<Fiber*>(transfer.data); +    fiber->Start(transfer); +} + +void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) { +    auto fiber = static_cast<Fiber*>(transfer.data); +    fiber->OnRewind(transfer); +} + +Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter) +    : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} { +    impl = std::make_unique<FiberImpl>(); +    impl->stack_limit = impl->stack.data(); +    impl->rewind_stack_limit = impl->rewind_stack.data(); +    u8* stack_base = impl->stack_limit + default_stack_size; +    impl->context = +        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc); +} + +void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) { +    rewind_point = std::move(rewind_func); +    rewind_parameter = start_parameter; +} + +Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {} + +Fiber::~Fiber() { +    if (released) { +        return; +    } +    // Make sure the Fiber is not being used +    const bool locked = guard.try_lock(); +    ASSERT_MSG(locked, "Destroying a fiber that's still running"); +    if (locked) { +        guard.unlock(); +    } +} + +void Fiber::Exit() { + +    ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber"); +    if (!is_thread_fiber) { +        return; +    } +    guard.unlock(); +    released = true; +} + +void Fiber::Rewind() { +    ASSERT(rewind_point); +    ASSERT(impl->rewind_context == nullptr); +    u8* stack_base = impl->rewind_stack_limit + default_stack_size; +    impl->rewind_context = +        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc); +    boost::context::detail::jump_fcontext(impl->rewind_context, this); +} + +void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) { +    ASSERT_MSG(from != nullptr, "Yielding fiber is null!"); +    ASSERT_MSG(to != nullptr, "Next fiber is null!"); +    to->guard.lock(); +    to->previous_fiber = from; +    auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get()); +    ASSERT(from->previous_fiber != nullptr); +    from->previous_fiber->impl->context = transfer.fctx; +    from->previous_fiber->guard.unlock(); +    from->previous_fiber.reset(); +} + +std::shared_ptr<Fiber> Fiber::ThreadToFiber() { +    std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()}; +    fiber->guard.lock(); +    fiber->is_thread_fiber = true; +    return fiber; +} + +#endif +} // namespace Common diff --git a/src/common/fiber.h b/src/common/fiber.h new file mode 100644 index 000000000..dafc1100e --- /dev/null +++ b/src/common/fiber.h @@ -0,0 +1,92 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <memory> + +#include "common/common_types.h" +#include "common/spin_lock.h" + +#if !defined(_WIN32) && !defined(WIN32) +namespace boost::context::detail { +struct transfer_t; +} +#endif + +namespace Common { + +/** + * Fiber class + * a fiber is a userspace thread with it's own context. They can be used to + * implement coroutines, emulated threading systems and certain asynchronous + * patterns. + * + * This class implements fibers at a low level, thus allowing greater freedom + * to implement such patterns. This fiber class is 'threadsafe' only one fiber + * can be running at a time and threads will be locked while trying to yield to + * a running fiber until it yields. WARNING exchanging two running fibers between + * threads will cause a deadlock. In order to prevent a deadlock, each thread should + * have an intermediary fiber, you switch to the intermediary fiber of the current + * thread and then from it switch to the expected fiber. This way you can exchange + * 2 fibers within 2 different threads. + */ +class Fiber { +public: +    Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter); +    ~Fiber(); + +    Fiber(const Fiber&) = delete; +    Fiber& operator=(const Fiber&) = delete; + +    Fiber(Fiber&&) = default; +    Fiber& operator=(Fiber&&) = default; + +    /// Yields control from Fiber 'from' to Fiber 'to' +    /// Fiber 'from' must be the currently running fiber. +    static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to); +    static std::shared_ptr<Fiber> ThreadToFiber(); + +    void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter); + +    void Rewind(); + +    /// Only call from main thread's fiber +    void Exit(); + +    /// Changes the start parameter of the fiber. Has no effect if the fiber already started +    void SetStartParameter(void* new_parameter) { +        start_parameter = new_parameter; +    } + +private: +    Fiber(); + +#if defined(_WIN32) || defined(WIN32) +    void OnRewind(); +    void Start(); +    static void FiberStartFunc(void* fiber_parameter); +    static void RewindStartFunc(void* fiber_parameter); +#else +    void OnRewind(boost::context::detail::transfer_t& transfer); +    void Start(boost::context::detail::transfer_t& transfer); +    static void FiberStartFunc(boost::context::detail::transfer_t transfer); +    static void RewindStartFunc(boost::context::detail::transfer_t transfer); +#endif + +    struct FiberImpl; + +    SpinLock guard{}; +    std::function<void(void*)> entry_point; +    std::function<void(void*)> rewind_point; +    void* rewind_parameter{}; +    void* start_parameter{}; +    std::shared_ptr<Fiber> previous_fiber; +    std::unique_ptr<FiberImpl> impl; +    bool is_thread_fiber{}; +    bool released{}; +}; + +} // namespace Common diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp index 3fdc309a2..8cff6ec37 100644 --- a/src/common/memory_detect.cpp +++ b/src/common/memory_detect.cpp @@ -9,10 +9,12 @@  // clang-format on  #else  #include <sys/types.h> -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__FreeBSD__)  #include <sys/sysctl.h> -#else +#elif defined(__linux__)  #include <sys/sysinfo.h> +#else +#include <unistd.h>  #endif  #endif @@ -38,15 +40,26 @@ static MemoryInfo Detect() {      // hw and vm are defined in sysctl.h      // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471      // sysctlbyname(const char *, void *, size_t *, void *, size_t); -    sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0); -    sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0); +    sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, nullptr, 0); +    sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, nullptr, 0);      mem_info.TotalPhysicalMemory = ramsize;      mem_info.TotalSwapMemory = vmusage.xsu_total; -#else +#elif defined(__FreeBSD__) +    u_long physmem, swap_total; +    std::size_t sizeof_u_long = sizeof(u_long); +    // sysctlbyname(const char *, void *, size_t *, const void *, size_t); +    sysctlbyname("hw.physmem", &physmem, &sizeof_u_long, nullptr, 0); +    sysctlbyname("vm.swap_total", &swap_total, &sizeof_u_long, nullptr, 0); +    mem_info.TotalPhysicalMemory = physmem; +    mem_info.TotalSwapMemory = swap_total; +#elif defined(__linux__)      struct sysinfo meminfo;      sysinfo(&meminfo);      mem_info.TotalPhysicalMemory = meminfo.totalram;      mem_info.TotalSwapMemory = meminfo.totalswap; +#else +    mem_info.TotalPhysicalMemory = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGE_SIZE); +    mem_info.TotalSwapMemory = 0;  #endif      return mem_info; diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp new file mode 100644 index 000000000..c1524220f --- /dev/null +++ b/src/common/spin_lock.cpp @@ -0,0 +1,54 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/spin_lock.h" + +#if _MSC_VER +#include <intrin.h> +#if _M_AMD64 +#define __x86_64__ 1 +#endif +#if _M_ARM64 +#define __aarch64__ 1 +#endif +#else +#if __x86_64__ +#include <xmmintrin.h> +#endif +#endif + +namespace { + +void ThreadPause() { +#if __x86_64__ +    _mm_pause(); +#elif __aarch64__ && _MSC_VER +    __yield(); +#elif __aarch64__ +    asm("yield"); +#endif +} + +} // Anonymous namespace + +namespace Common { + +void SpinLock::lock() { +    while (lck.test_and_set(std::memory_order_acquire)) { +        ThreadPause(); +    } +} + +void SpinLock::unlock() { +    lck.clear(std::memory_order_release); +} + +bool SpinLock::try_lock() { +    if (lck.test_and_set(std::memory_order_acquire)) { +        return false; +    } +    return true; +} + +} // namespace Common diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h new file mode 100644 index 000000000..1df5528c4 --- /dev/null +++ b/src/common/spin_lock.h @@ -0,0 +1,26 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> + +namespace Common { + +/** + * SpinLock class + * a lock similar to mutex that forces a thread to spin wait instead calling the + * supervisor. Should be used on short sequences of code. + */ +class SpinLock { +public: +    void lock(); +    void unlock(); +    bool try_lock(); + +private: +    std::atomic_flag lck = ATOMIC_FLAG_INIT; +}; + +} // namespace Common diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 200c6489a..16d42facd 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) {      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); +    fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);      fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 0cd2d10bf..8e5935e6a 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -25,6 +25,52 @@  namespace Common { +#ifdef _WIN32 + +void SetCurrentThreadPriority(ThreadPriority new_priority) { +    auto handle = GetCurrentThread(); +    int windows_priority = 0; +    switch (new_priority) { +    case ThreadPriority::Low: +        windows_priority = THREAD_PRIORITY_BELOW_NORMAL; +        break; +    case ThreadPriority::Normal: +        windows_priority = THREAD_PRIORITY_NORMAL; +        break; +    case ThreadPriority::High: +        windows_priority = THREAD_PRIORITY_ABOVE_NORMAL; +        break; +    case ThreadPriority::VeryHigh: +        windows_priority = THREAD_PRIORITY_HIGHEST; +        break; +    default: +        windows_priority = THREAD_PRIORITY_NORMAL; +        break; +    } +    SetThreadPriority(handle, windows_priority); +} + +#else + +void SetCurrentThreadPriority(ThreadPriority new_priority) { +    pthread_t this_thread = pthread_self(); + +    s32 max_prio = sched_get_priority_max(SCHED_OTHER); +    s32 min_prio = sched_get_priority_min(SCHED_OTHER); +    u32 level = static_cast<u32>(new_priority) + 1; + +    struct sched_param params; +    if (max_prio > min_prio) { +        params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4; +    } else { +        params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4; +    } + +    pthread_setschedparam(this_thread, SCHED_OTHER, ¶ms); +} + +#endif +  #ifdef _MSC_VER  // Sets the debugger-visible name of the current thread. @@ -70,6 +116,12 @@ void SetCurrentThreadName(const char* name) {  }  #endif +#if defined(_WIN32) +void SetCurrentThreadName(const char* name) { +    // Do Nothing on MingW +} +#endif +  #endif  } // namespace Common diff --git a/src/common/thread.h b/src/common/thread.h index 2fc071685..52b359413 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -9,6 +9,7 @@  #include <cstddef>  #include <mutex>  #include <thread> +#include "common/common_types.h"  namespace Common { @@ -28,8 +29,7 @@ public:          is_set = false;      } -    template <class Duration> -    bool WaitFor(const std::chrono::duration<Duration>& time) { +    bool WaitFor(const std::chrono::nanoseconds& time) {          std::unique_lock lk{mutex};          if (!condvar.wait_for(lk, time, [this] { return is_set; }))              return false; @@ -86,6 +86,15 @@ private:      std::size_t generation = 0; // Incremented once each time the barrier is used  }; +enum class ThreadPriority : u32 { +    Low = 0, +    Normal = 1, +    High = 2, +    VeryHigh = 3, +}; + +void SetCurrentThreadPriority(ThreadPriority new_priority); +  void SetCurrentThreadName(const char* name);  } // namespace Common diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp index 32bf56730..16bf7c828 100644 --- a/src/common/uint128.cpp +++ b/src/common/uint128.cpp @@ -6,12 +6,38 @@  #include <intrin.h>  #pragma intrinsic(_umul128) +#pragma intrinsic(_udiv128)  #endif  #include <cstring>  #include "common/uint128.h"  namespace Common { +#ifdef _MSC_VER + +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { +    u128 r{}; +    r[0] = _umul128(a, b, &r[1]); +    u64 remainder; +#if _MSC_VER < 1923 +    return udiv128(r[1], r[0], d, &remainder); +#else +    return _udiv128(r[1], r[0], d, &remainder); +#endif +} + +#else + +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) { +    const u64 diva = a / d; +    const u64 moda = a % d; +    const u64 divb = b / d; +    const u64 modb = b % d; +    return diva * b + moda * divb + moda * modb / d; +} + +#endif +  u128 Multiply64Into128(u64 a, u64 b) {      u128 result;  #ifdef _MSC_VER diff --git a/src/common/uint128.h b/src/common/uint128.h index a3be2a2cb..503cd2d0c 100644 --- a/src/common/uint128.h +++ b/src/common/uint128.h @@ -9,6 +9,9 @@  namespace Common { +// This function multiplies 2 u64 values and divides it by a u64 value. +u64 MultiplyAndDivide64(u64 a, u64 b, u64 d); +  // This function multiplies 2 u64 values and produces a u128 value;  u128 Multiply64Into128(u64 a, u64 b); diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp new file mode 100644 index 000000000..3afbdb898 --- /dev/null +++ b/src/common/wall_clock.cpp @@ -0,0 +1,91 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/uint128.h" +#include "common/wall_clock.h" + +#ifdef ARCHITECTURE_x86_64 +#include "common/x64/cpu_detect.h" +#include "common/x64/native_clock.h" +#endif + +namespace Common { + +using base_timer = std::chrono::steady_clock; +using base_time_point = std::chrono::time_point<base_timer>; + +class StandardWallClock : public WallClock { +public: +    StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency) +        : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) { +        start_time = base_timer::now(); +    } + +    std::chrono::nanoseconds GetTimeNS() override { +        base_time_point current = base_timer::now(); +        auto elapsed = current - start_time; +        return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed); +    } + +    std::chrono::microseconds GetTimeUS() override { +        base_time_point current = base_timer::now(); +        auto elapsed = current - start_time; +        return std::chrono::duration_cast<std::chrono::microseconds>(elapsed); +    } + +    std::chrono::milliseconds GetTimeMS() override { +        base_time_point current = base_timer::now(); +        auto elapsed = current - start_time; +        return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed); +    } + +    u64 GetClockCycles() override { +        std::chrono::nanoseconds time_now = GetTimeNS(); +        const u128 temporary = +            Common::Multiply64Into128(time_now.count(), emulated_clock_frequency); +        return Common::Divide128On32(temporary, 1000000000).first; +    } + +    u64 GetCPUCycles() override { +        std::chrono::nanoseconds time_now = GetTimeNS(); +        const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency); +        return Common::Divide128On32(temporary, 1000000000).first; +    } + +    void Pause(bool is_paused) override { +        // Do nothing in this clock type. +    } + +private: +    base_time_point start_time; +}; + +#ifdef ARCHITECTURE_x86_64 + +std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, +                                                   u32 emulated_clock_frequency) { +    const auto& caps = GetCPUCaps(); +    u64 rtsc_frequency = 0; +    if (caps.invariant_tsc) { +        rtsc_frequency = EstimateRDTSCFrequency(); +    } +    if (rtsc_frequency == 0) { +        return std::make_unique<StandardWallClock>(emulated_cpu_frequency, +                                                   emulated_clock_frequency); +    } else { +        return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, +                                                  rtsc_frequency); +    } +} + +#else + +std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, +                                                   u32 emulated_clock_frequency) { +    return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); +} + +#endif + +} // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h new file mode 100644 index 000000000..367d72134 --- /dev/null +++ b/src/common/wall_clock.h @@ -0,0 +1,53 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <chrono> +#include <memory> + +#include "common/common_types.h" + +namespace Common { + +class WallClock { +public: +    /// Returns current wall time in nanoseconds +    virtual std::chrono::nanoseconds GetTimeNS() = 0; + +    /// Returns current wall time in microseconds +    virtual std::chrono::microseconds GetTimeUS() = 0; + +    /// Returns current wall time in milliseconds +    virtual std::chrono::milliseconds GetTimeMS() = 0; + +    /// Returns current wall time in emulated clock cycles +    virtual u64 GetClockCycles() = 0; + +    /// Returns current wall time in emulated cpu cycles +    virtual u64 GetCPUCycles() = 0; + +    virtual void Pause(bool is_paused) = 0; + +    /// Tells if the wall clock, uses the host CPU's hardware clock +    bool IsNative() const { +        return is_native; +    } + +protected: +    WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native) +        : emulated_cpu_frequency{emulated_cpu_frequency}, +          emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {} + +    u64 emulated_cpu_frequency; +    u64 emulated_clock_frequency; + +private: +    bool is_native; +}; + +std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, +                                                   u32 emulated_clock_frequency); + +} // namespace Common diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..fccd2eee5 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -62,6 +62,17 @@ static CPUCaps Detect() {      std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));      std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));      std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); +    if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) +        caps.manufacturer = Manufacturer::Intel; +    else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) +        caps.manufacturer = Manufacturer::AMD; +    else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) +        caps.manufacturer = Manufacturer::Hygon; +    else +        caps.manufacturer = Manufacturer::Unknown; + +    u32 family = {}; +    u32 model = {};      __cpuid(cpu_id, 0x80000000); @@ -73,6 +84,14 @@ static CPUCaps Detect() {      // Detect family and other miscellaneous features      if (max_std_fn >= 1) {          __cpuid(cpu_id, 0x00000001); +        family = (cpu_id[0] >> 8) & 0xf; +        model = (cpu_id[0] >> 4) & 0xf; +        if (family == 0xf) { +            family += (cpu_id[0] >> 20) & 0xff; +        } +        if (family >= 6) { +            model += ((cpu_id[0] >> 16) & 0xf) << 4; +        }          if ((cpu_id[3] >> 25) & 1)              caps.sse = true; @@ -110,6 +129,11 @@ static CPUCaps Detect() {                  caps.bmi1 = true;              if ((cpu_id[1] >> 8) & 1)                  caps.bmi2 = true; +            // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) +            if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && +                (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { +                caps.avx512 = caps.avx2; +            }          }      } @@ -130,6 +154,20 @@ static CPUCaps Detect() {              caps.fma4 = true;      } +    if (max_ex_fn >= 0x80000007) { +        __cpuid(cpu_id, 0x80000007); +        if (cpu_id[3] & (1 << 8)) { +            caps.invariant_tsc = true; +        } +    } + +    if (max_std_fn >= 0x16) { +        __cpuid(cpu_id, 0x16); +        caps.base_frequency = cpu_id[0]; +        caps.max_frequency = cpu_id[1]; +        caps.bus_frequency = cpu_id[2]; +    } +      return caps;  } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..e3b63302e 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -6,8 +6,16 @@  namespace Common { +enum class Manufacturer : u32 { +    Intel = 0, +    AMD = 1, +    Hygon = 2, +    Unknown = 3, +}; +  /// x86/x64 CPU capabilities that may be detected by this module  struct CPUCaps { +    Manufacturer manufacturer;      char cpu_string[0x21];      char brand_string[0x41];      bool sse; @@ -19,11 +27,16 @@ struct CPUCaps {      bool lzcnt;      bool avx;      bool avx2; +    bool avx512;      bool bmi1;      bool bmi2;      bool fma;      bool fma4;      bool aes; +    bool invariant_tsc; +    u32 base_frequency; +    u32 max_frequency; +    u32 bus_frequency;  };  /** diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp new file mode 100644 index 000000000..424b39b1f --- /dev/null +++ b/src/common/x64/native_clock.cpp @@ -0,0 +1,103 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include <mutex> +#include <thread> + +#ifdef _MSC_VER +#include <intrin.h> +#else +#include <x86intrin.h> +#endif + +#include "common/uint128.h" +#include "common/x64/native_clock.h" + +namespace Common { + +u64 EstimateRDTSCFrequency() { +    const auto milli_10 = std::chrono::milliseconds{10}; +    // get current time +    _mm_mfence(); +    const u64 tscStart = __rdtsc(); +    const auto startTime = std::chrono::high_resolution_clock::now(); +    // wait roughly 3 seconds +    while (true) { +        auto milli = std::chrono::duration_cast<std::chrono::milliseconds>( +            std::chrono::high_resolution_clock::now() - startTime); +        if (milli.count() >= 3000) +            break; +        std::this_thread::sleep_for(milli_10); +    } +    const auto endTime = std::chrono::high_resolution_clock::now(); +    _mm_mfence(); +    const u64 tscEnd = __rdtsc(); +    // calculate difference +    const u64 timer_diff = +        std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count(); +    const u64 tsc_diff = tscEnd - tscStart; +    const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); +    return tsc_freq; +} + +namespace X64 { +NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, +                         u64 rtsc_frequency) +    : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{ +                                                                             rtsc_frequency} { +    _mm_mfence(); +    last_measure = __rdtsc(); +    accumulated_ticks = 0U; +} + +u64 NativeClock::GetRTSC() { +    std::scoped_lock scope{rtsc_serialize}; +    _mm_mfence(); +    const u64 current_measure = __rdtsc(); +    u64 diff = current_measure - last_measure; +    diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) +    if (current_measure > last_measure) { +        last_measure = current_measure; +    } +    accumulated_ticks += diff; +    /// The clock cannot be more precise than the guest timer, remove the lower bits +    return accumulated_ticks & inaccuracy_mask; +} + +void NativeClock::Pause(bool is_paused) { +    if (!is_paused) { +        _mm_mfence(); +        last_measure = __rdtsc(); +    } +} + +std::chrono::nanoseconds NativeClock::GetTimeNS() { +    const u64 rtsc_value = GetRTSC(); +    return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; +} + +std::chrono::microseconds NativeClock::GetTimeUS() { +    const u64 rtsc_value = GetRTSC(); +    return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; +} + +std::chrono::milliseconds NativeClock::GetTimeMS() { +    const u64 rtsc_value = GetRTSC(); +    return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; +} + +u64 NativeClock::GetClockCycles() { +    const u64 rtsc_value = GetRTSC(); +    return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); +} + +u64 NativeClock::GetCPUCycles() { +    const u64 rtsc_value = GetRTSC(); +    return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); +} + +} // namespace X64 + +} // namespace Common diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h new file mode 100644 index 000000000..891a3bbfd --- /dev/null +++ b/src/common/x64/native_clock.h @@ -0,0 +1,48 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <optional> + +#include "common/spin_lock.h" +#include "common/wall_clock.h" + +namespace Common { + +namespace X64 { +class NativeClock : public WallClock { +public: +    NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency); + +    std::chrono::nanoseconds GetTimeNS() override; + +    std::chrono::microseconds GetTimeUS() override; + +    std::chrono::milliseconds GetTimeMS() override; + +    u64 GetClockCycles() override; + +    u64 GetCPUCycles() override; + +    void Pause(bool is_paused) override; + +private: +    u64 GetRTSC(); + +    /// value used to reduce the native clocks accuracy as some apss rely on +    /// undefined behavior where the level of accuracy in the clock shouldn't +    /// be higher. +    static constexpr u64 inaccuracy_mask = ~(0x400 - 1); + +    SpinLock rtsc_serialize{}; +    u64 last_measure{}; +    u64 accumulated_ticks{}; +    u64 rtsc_frequency; +}; +} // namespace X64 + +u64 EstimateRDTSCFrequency(); + +} // namespace Common diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index cb9ced5c9..d1f173f42 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -7,6 +7,16 @@ endif()  add_library(core STATIC      arm/arm_interface.h      arm/arm_interface.cpp +    arm/cpu_interrupt_handler.cpp +    arm/cpu_interrupt_handler.h +    arm/dynarmic/arm_dynarmic_32.cpp +    arm/dynarmic/arm_dynarmic_32.h +    arm/dynarmic/arm_dynarmic_64.cpp +    arm/dynarmic/arm_dynarmic_64.h +    arm/dynarmic/arm_dynarmic_cp15.cpp +    arm/dynarmic/arm_dynarmic_cp15.h +    arm/dynarmic/arm_exclusive_monitor.cpp +    arm/dynarmic/arm_exclusive_monitor.h      arm/exclusive_monitor.cpp      arm/exclusive_monitor.h      arm/unicorn/arm_unicorn.cpp @@ -15,8 +25,6 @@ add_library(core STATIC      constants.h      core.cpp      core.h -    core_manager.cpp -    core_manager.h      core_timing.cpp      core_timing.h      core_timing_util.cpp @@ -606,7 +614,7 @@ endif()  create_target_directory_groups(core)  target_link_libraries(core PUBLIC common PRIVATE audio_core video_core) -target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn zip) +target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls opus unicorn zip)  if (YUZU_ENABLE_BOXCAT)      target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT) diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index d079a1bc8..d2295ed90 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -139,6 +139,63 @@ std::optional<std::string> GetSymbolName(const Symbols& symbols, VAddr func_addr  constexpr u64 SEGMENT_BASE = 0x7100000000ull; +std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContext( +    System& system, const ThreadContext64& ctx) { +    std::vector<BacktraceEntry> out; +    auto& memory = system.Memory(); + +    auto fp = ctx.cpu_registers[29]; +    auto lr = ctx.cpu_registers[30]; +    while (true) { +        out.push_back({"", 0, lr, 0}); +        if (!fp) { +            break; +        } +        lr = memory.Read64(fp + 8) - 4; +        fp = memory.Read64(fp); +    } + +    std::map<VAddr, std::string> modules; +    auto& loader{system.GetAppLoader()}; +    if (loader.ReadNSOModules(modules) != Loader::ResultStatus::Success) { +        return {}; +    } + +    std::map<std::string, Symbols> symbols; +    for (const auto& module : modules) { +        symbols.insert_or_assign(module.second, GetSymbols(module.first, memory)); +    } + +    for (auto& entry : out) { +        VAddr base = 0; +        for (auto iter = modules.rbegin(); iter != modules.rend(); ++iter) { +            const auto& module{*iter}; +            if (entry.original_address >= module.first) { +                entry.module = module.second; +                base = module.first; +                break; +            } +        } + +        entry.offset = entry.original_address - base; +        entry.address = SEGMENT_BASE + entry.offset; + +        if (entry.module.empty()) +            entry.module = "unknown"; + +        const auto symbol_set = symbols.find(entry.module); +        if (symbol_set != symbols.end()) { +            const auto symbol = GetSymbolName(symbol_set->second, entry.offset); +            if (symbol.has_value()) { +                // TODO(DarkLordZach): Add demangling of symbol names. +                entry.name = *symbol; +            } +        } +    } + +    return out; +} +  std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const {      std::vector<BacktraceEntry> out;      auto& memory = system.Memory(); diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index cb2e640e2..1f24051e4 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -7,6 +7,7 @@  #include <array>  #include <vector>  #include "common/common_types.h" +#include "core/hardware_properties.h"  namespace Common {  struct PageTable; @@ -18,25 +19,29 @@ enum class VMAPermission : u8;  namespace Core {  class System; +class CPUInterruptHandler; + +using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>;  /// Generic ARMv8 CPU interface  class ARM_Interface : NonCopyable {  public: -    explicit ARM_Interface(System& system_) : system{system_} {} +    explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock) +        : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{ +                                                                       uses_wall_clock} {}      virtual ~ARM_Interface() = default;      struct ThreadContext32 {          std::array<u32, 16> cpu_registers{}; +        std::array<u32, 64> extension_registers{};          u32 cpsr{}; -        std::array<u8, 4> padding{}; -        std::array<u64, 32> fprs{};          u32 fpscr{};          u32 fpexc{};          u32 tpidr{};      };      // Internally within the kernel, it expects the AArch32 version of the      // thread context to be 344 bytes in size. -    static_assert(sizeof(ThreadContext32) == 0x158); +    static_assert(sizeof(ThreadContext32) == 0x150);      struct ThreadContext64 {          std::array<u64, 31> cpu_registers{}; @@ -143,6 +148,8 @@ public:       */      virtual void SetTPIDR_EL0(u64 value) = 0; +    virtual void ChangeProcessorID(std::size_t new_core_id) = 0; +      virtual void SaveContext(ThreadContext32& ctx) = 0;      virtual void SaveContext(ThreadContext64& ctx) = 0;      virtual void LoadContext(const ThreadContext32& ctx) = 0; @@ -162,6 +169,9 @@ public:          std::string name;      }; +    static std::vector<BacktraceEntry> GetBacktraceFromContext(System& system, +                                                               const ThreadContext64& ctx); +      std::vector<BacktraceEntry> GetBacktrace() const;      /// fp (= r29) points to the last frame record. @@ -175,6 +185,8 @@ public:  protected:      /// System context that this ARM interface is running under.      System& system; +    CPUInterrupts& interrupt_handlers; +    bool uses_wall_clock;  };  } // namespace Core diff --git a/src/core/arm/cpu_interrupt_handler.cpp b/src/core/arm/cpu_interrupt_handler.cpp new file mode 100644 index 000000000..2f1a1a269 --- /dev/null +++ b/src/core/arm/cpu_interrupt_handler.cpp @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/thread.h" +#include "core/arm/cpu_interrupt_handler.h" + +namespace Core { + +CPUInterruptHandler::CPUInterruptHandler() : is_interrupted{} { +    interrupt_event = std::make_unique<Common::Event>(); +} + +CPUInterruptHandler::~CPUInterruptHandler() = default; + +void CPUInterruptHandler::SetInterrupt(bool is_interrupted_) { +    if (is_interrupted_) { +        interrupt_event->Set(); +    } +    this->is_interrupted = is_interrupted_; +} + +void CPUInterruptHandler::AwaitInterrupt() { +    interrupt_event->Wait(); +} + +} // namespace Core diff --git a/src/core/arm/cpu_interrupt_handler.h b/src/core/arm/cpu_interrupt_handler.h new file mode 100644 index 000000000..3d062d326 --- /dev/null +++ b/src/core/arm/cpu_interrupt_handler.h @@ -0,0 +1,39 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> + +namespace Common { +class Event; +} + +namespace Core { + +class CPUInterruptHandler { +public: +    CPUInterruptHandler(); +    ~CPUInterruptHandler(); + +    CPUInterruptHandler(const CPUInterruptHandler&) = delete; +    CPUInterruptHandler& operator=(const CPUInterruptHandler&) = delete; + +    CPUInterruptHandler(CPUInterruptHandler&&) = default; +    CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default; + +    bool IsInterrupted() const { +        return is_interrupted; +    } + +    void SetInterrupt(bool is_interrupted); + +    void AwaitInterrupt(); + +private: +    bool is_interrupted{}; +    std::unique_ptr<Common::Event> interrupt_event; +}; + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 19d798dc7..0d4ab95b7 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -7,15 +7,17 @@  #include <dynarmic/A32/a32.h>  #include <dynarmic/A32/config.h>  #include <dynarmic/A32/context.h> -#include "common/microprofile.h" +#include "common/logging/log.h" +#include "common/page_table.h" +#include "core/arm/cpu_interrupt_handler.h"  #include "core/arm/dynarmic/arm_dynarmic_32.h" -#include "core/arm/dynarmic/arm_dynarmic_64.h"  #include "core/arm/dynarmic/arm_dynarmic_cp15.h" +#include "core/arm/dynarmic/arm_exclusive_monitor.h"  #include "core/core.h" -#include "core/core_manager.h"  #include "core/core_timing.h"  #include "core/hle/kernel/svc.h"  #include "core/memory.h" +#include "core/settings.h"  namespace Core { @@ -49,6 +51,19 @@ public:          parent.system.Memory().Write64(vaddr, value);      } +    bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override { +        return parent.system.Memory().WriteExclusive8(vaddr, value, expected); +    } +    bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override { +        return parent.system.Memory().WriteExclusive16(vaddr, value, expected); +    } +    bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override { +        return parent.system.Memory().WriteExclusive32(vaddr, value, expected); +    } +    bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override { +        return parent.system.Memory().WriteExclusive64(vaddr, value, expected); +    } +      void InterpreterFallback(u32 pc, std::size_t num_instructions) override {          UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,                            MemoryReadCode(pc)); @@ -62,7 +77,7 @@ public:          case Dynarmic::A32::Exception::Breakpoint:              break;          } -        LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", +        LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",                       static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));          UNIMPLEMENTED();      } @@ -72,24 +87,36 @@ public:      }      void AddTicks(u64 ticks) override { +        if (parent.uses_wall_clock) { +            return; +        }          // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a          // rough approximation of the amount of executed ticks in the system, it may be thrown off          // if not all cores are doing a similar amount of work. Instead of doing this, we should          // device a way so that timing is consistent across all cores without increasing the ticks 4          // times. -        u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; +        u64 amortized_ticks = +            (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;          // Always execute at least one tick.          amortized_ticks = std::max<u64>(amortized_ticks, 1);          parent.system.CoreTiming().AddTicks(amortized_ticks);          num_interpreted_instructions = 0;      } +      u64 GetTicksRemaining() override { -        return std::max(parent.system.CoreTiming().GetDowncount(), {}); +        if (parent.uses_wall_clock) { +            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { +                return minimum_run_cycles; +            } +            return 0U; +        } +        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);      }      ARM_Dynarmic_32& parent;      std::size_t num_interpreted_instructions{}; +    static constexpr u64 minimum_run_cycles = 1000U;  };  std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, @@ -100,13 +127,31 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&      // config.page_table = &page_table.pointers;      config.coprocessors[15] = cp15;      config.define_unpredictable_behaviour = true; +    static constexpr std::size_t PAGE_BITS = 12; +    static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS); +    config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( +        page_table.pointers.data()); +    config.absolute_offset_page_table = true; +    config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; +    config.only_detect_misalignment_via_page_table_on_page_boundary = true; + +    // Multi-process state +    config.processor_id = core_index; +    config.global_monitor = &exclusive_monitor.monitor; + +    // Timing +    config.wall_clock_cntpct = uses_wall_clock; + +    // Optimizations +    if (Settings::values.disable_cpu_opt) { +        config.enable_optimizations = false; +        config.enable_fast_dispatch = false; +    } +      return std::make_unique<Dynarmic::A32::Jit>(config);  } -MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); -  void ARM_Dynarmic_32::Run() { -    MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32);      jit->Run();  } @@ -114,9 +159,11 @@ void ARM_Dynarmic_32::Step() {      jit->Step();  } -ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, +ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, +                                 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,                                   std::size_t core_index) -    : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)), +    : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, +      cb(std::make_unique<DynarmicCallbacks32>(*this)),        cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},        exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} @@ -168,17 +215,25 @@ void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {      cp15->uprw = static_cast<u32>(value);  } +void ARM_Dynarmic_32::ChangeProcessorID(std::size_t new_core_id) { +    jit->ChangeProcessorID(new_core_id); +} +  void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {      Dynarmic::A32::Context context;      jit->SaveContext(context);      ctx.cpu_registers = context.Regs(); +    ctx.extension_registers = context.ExtRegs();      ctx.cpsr = context.Cpsr(); +    ctx.fpscr = context.Fpscr();  }  void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {      Dynarmic::A32::Context context;      context.Regs() = ctx.cpu_registers; +    context.ExtRegs() = ctx.extension_registers;      context.SetCpsr(ctx.cpsr); +    context.SetFpscr(ctx.fpscr);      jit->LoadContext(context);  } @@ -187,10 +242,15 @@ void ARM_Dynarmic_32::PrepareReschedule() {  }  void ARM_Dynarmic_32::ClearInstructionCache() { +    if (!jit) { +        return; +    }      jit->ClearCache();  } -void ARM_Dynarmic_32::ClearExclusiveState() {} +void ARM_Dynarmic_32::ClearExclusiveState() { +    jit->ClearExclusiveState(); +}  void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table,                                         std::size_t new_address_space_size_in_bits) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index e5b92d7bb..2bab31b92 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -9,7 +9,7 @@  #include <dynarmic/A32/a32.h>  #include <dynarmic/A64/a64.h> -#include <dynarmic/A64/exclusive_monitor.h> +#include <dynarmic/exclusive_monitor.h>  #include "common/common_types.h"  #include "common/hash.h"  #include "core/arm/arm_interface.h" @@ -21,6 +21,7 @@ class Memory;  namespace Core { +class CPUInterruptHandler;  class DynarmicCallbacks32;  class DynarmicCP15;  class DynarmicExclusiveMonitor; @@ -28,7 +29,8 @@ class System;  class ARM_Dynarmic_32 final : public ARM_Interface {  public: -    ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); +    ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, +                    ExclusiveMonitor& exclusive_monitor, std::size_t core_index);      ~ARM_Dynarmic_32() override;      void SetPC(u64 pc) override; @@ -45,6 +47,7 @@ public:      void SetTlsAddress(VAddr address) override;      void SetTPIDR_EL0(u64 value) override;      u64 GetTPIDR_EL0() const override; +    void ChangeProcessorID(std::size_t new_core_id) override;      void SaveContext(ThreadContext32& ctx) override;      void SaveContext(ThreadContext64& ctx) override {} diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index 337b97be9..790981034 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -7,11 +7,11 @@  #include <dynarmic/A64/a64.h>  #include <dynarmic/A64/config.h>  #include "common/logging/log.h" -#include "common/microprofile.h"  #include "common/page_table.h" +#include "core/arm/cpu_interrupt_handler.h"  #include "core/arm/dynarmic/arm_dynarmic_64.h" +#include "core/arm/dynarmic/arm_exclusive_monitor.h"  #include "core/core.h" -#include "core/core_manager.h"  #include "core/core_timing.h"  #include "core/core_timing_util.h"  #include "core/gdbstub/gdbstub.h" @@ -65,6 +65,22 @@ public:          memory.Write64(vaddr + 8, value[1]);      } +    bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override { +        return parent.system.Memory().WriteExclusive8(vaddr, value, expected); +    } +    bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override { +        return parent.system.Memory().WriteExclusive16(vaddr, value, expected); +    } +    bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override { +        return parent.system.Memory().WriteExclusive32(vaddr, value, expected); +    } +    bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override { +        return parent.system.Memory().WriteExclusive64(vaddr, value, expected); +    } +    bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override { +        return parent.system.Memory().WriteExclusive128(vaddr, value, expected); +    } +      void InterpreterFallback(u64 pc, std::size_t num_instructions) override {          LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,                   num_instructions, MemoryReadCode(pc)); @@ -98,8 +114,8 @@ public:              }              [[fallthrough]];          default: -            ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})", -                       static_cast<std::size_t>(exception), pc); +            ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", +                       static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));          }      } @@ -108,29 +124,42 @@ public:      }      void AddTicks(u64 ticks) override { +        if (parent.uses_wall_clock) { +            return; +        }          // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a          // rough approximation of the amount of executed ticks in the system, it may be thrown off          // if not all cores are doing a similar amount of work. Instead of doing this, we should          // device a way so that timing is consistent across all cores without increasing the ticks 4          // times. -        u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; +        u64 amortized_ticks = +            (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;          // Always execute at least one tick.          amortized_ticks = std::max<u64>(amortized_ticks, 1);          parent.system.CoreTiming().AddTicks(amortized_ticks);          num_interpreted_instructions = 0;      } +      u64 GetTicksRemaining() override { -        return std::max(parent.system.CoreTiming().GetDowncount(), s64{0}); +        if (parent.uses_wall_clock) { +            if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { +                return minimum_run_cycles; +            } +            return 0U; +        } +        return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);      } +      u64 GetCNTPCT() override { -        return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); +        return parent.system.CoreTiming().GetClockTicks();      }      ARM_Dynarmic_64& parent;      std::size_t num_interpreted_instructions = 0;      u64 tpidrro_el0 = 0;      u64 tpidr_el0 = 0; +    static constexpr u64 minimum_run_cycles = 1000U;  };  std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table, @@ -168,14 +197,13 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&          config.enable_fast_dispatch = false;      } +    // Timing +    config.wall_clock_cntpct = uses_wall_clock; +      return std::make_shared<Dynarmic::A64::Jit>(config);  } -MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); -  void ARM_Dynarmic_64::Run() { -    MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64); -      jit->Run();  } @@ -183,11 +211,16 @@ void ARM_Dynarmic_64::Step() {      cb->InterpreterFallback(jit->GetPC(), 1);  } -ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, +ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, +                                 bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,                                   std::size_t core_index) -    : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks64>(*this)), -      inner_unicorn{system, ARM_Unicorn::Arch::AArch64}, core_index{core_index}, -      exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} +    : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, +      cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handlers, +                                                                      uses_wall_clock, +                                                                      ARM_Unicorn::Arch::AArch64, +                                                                      core_index}, +      core_index{core_index}, exclusive_monitor{ +                                  dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}  ARM_Dynarmic_64::~ARM_Dynarmic_64() = default; @@ -239,6 +272,10 @@ void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) {      cb->tpidr_el0 = value;  } +void ARM_Dynarmic_64::ChangeProcessorID(std::size_t new_core_id) { +    jit->ChangeProcessorID(new_core_id); +} +  void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) {      ctx.cpu_registers = jit->GetRegisters();      ctx.sp = jit->GetSP(); @@ -266,6 +303,9 @@ void ARM_Dynarmic_64::PrepareReschedule() {  }  void ARM_Dynarmic_64::ClearInstructionCache() { +    if (!jit) { +        return; +    }      jit->ClearCache();  } @@ -285,44 +325,4 @@ void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table,      jit_cache.emplace(key, jit);  } -DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count) -    : monitor(core_count), memory{memory} {} - -DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; - -void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) { -    // Size doesn't actually matter. -    monitor.Mark(core_index, addr, 16); -} - -void DynarmicExclusiveMonitor::ClearExclusive() { -    monitor.Clear(); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { -    return monitor.DoExclusiveOperation(core_index, vaddr, 1, [&] { memory.Write8(vaddr, value); }); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) { -    return monitor.DoExclusiveOperation(core_index, vaddr, 2, -                                        [&] { memory.Write16(vaddr, value); }); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) { -    return monitor.DoExclusiveOperation(core_index, vaddr, 4, -                                        [&] { memory.Write32(vaddr, value); }); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) { -    return monitor.DoExclusiveOperation(core_index, vaddr, 8, -                                        [&] { memory.Write64(vaddr, value); }); -} - -bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) { -    return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] { -        memory.Write64(vaddr + 0, value[0]); -        memory.Write64(vaddr + 8, value[1]); -    }); -} -  } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 647cecaf0..403c55961 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -8,7 +8,6 @@  #include <unordered_map>  #include <dynarmic/A64/a64.h> -#include <dynarmic/A64/exclusive_monitor.h>  #include "common/common_types.h"  #include "common/hash.h"  #include "core/arm/arm_interface.h" @@ -22,12 +21,14 @@ class Memory;  namespace Core {  class DynarmicCallbacks64; +class CPUInterruptHandler;  class DynarmicExclusiveMonitor;  class System;  class ARM_Dynarmic_64 final : public ARM_Interface {  public: -    ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); +    ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, +                    ExclusiveMonitor& exclusive_monitor, std::size_t core_index);      ~ARM_Dynarmic_64() override;      void SetPC(u64 pc) override; @@ -44,6 +45,7 @@ public:      void SetTlsAddress(VAddr address) override;      void SetTPIDR_EL0(u64 value) override;      u64 GetTPIDR_EL0() const override; +    void ChangeProcessorID(std::size_t new_core_id) override;      void SaveContext(ThreadContext32& ctx) override {}      void SaveContext(ThreadContext64& ctx) override; @@ -75,24 +77,4 @@ private:      DynarmicExclusiveMonitor& exclusive_monitor;  }; -class DynarmicExclusiveMonitor final : public ExclusiveMonitor { -public: -    explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count); -    ~DynarmicExclusiveMonitor() override; - -    void SetExclusive(std::size_t core_index, VAddr addr) override; -    void ClearExclusive() override; - -    bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; -    bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; -    bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override; -    bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override; -    bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; - -private: -    friend class ARM_Dynarmic_64; -    Dynarmic::A64::ExclusiveMonitor monitor; -    Core::Memory::Memory& memory; -}; -  } // namespace Core diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp index d43e4dd70..54556e0f9 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp @@ -97,7 +97,7 @@ CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc          const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(              [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {                  ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg; -                return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); +                return parent.system.CoreTiming().GetClockTicks();              });          return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};      } diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp new file mode 100644 index 000000000..4e209f6a5 --- /dev/null +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp @@ -0,0 +1,76 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cinttypes> +#include <memory> +#include "core/arm/dynarmic/arm_exclusive_monitor.h" +#include "core/memory.h" + +namespace Core { + +DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count) +    : monitor(core_count), memory{memory} {} + +DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; + +u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) { +    return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); }); +} + +u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) { +    return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); }); +} + +u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) { +    return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); }); +} + +u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) { +    return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); }); +} + +u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr addr) { +    return monitor.ReadAndMark<u128>(core_index, addr, [&]() -> u128 { +        u128 result; +        result[0] = memory.Read64(addr); +        result[1] = memory.Read64(addr + 8); +        return result; +    }); +} + +void DynarmicExclusiveMonitor::ClearExclusive() { +    monitor.Clear(); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { +    return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool { +        return memory.WriteExclusive8(vaddr, value, expected); +    }); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) { +    return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool { +        return memory.WriteExclusive16(vaddr, value, expected); +    }); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) { +    return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool { +        return memory.WriteExclusive32(vaddr, value, expected); +    }); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) { +    return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool { +        return memory.WriteExclusive64(vaddr, value, expected); +    }); +} + +bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) { +    return monitor.DoExclusiveOperation<u128>(core_index, vaddr, [&](u128 expected) -> bool { +        return memory.WriteExclusive128(vaddr, value, expected); +    }); +} + +} // namespace Core diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h new file mode 100644 index 000000000..964f4a55d --- /dev/null +++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h @@ -0,0 +1,48 @@ +// Copyright 2020 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <unordered_map> + +#include <dynarmic/exclusive_monitor.h> + +#include "common/common_types.h" +#include "core/arm/dynarmic/arm_dynarmic_32.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" +#include "core/arm/exclusive_monitor.h" + +namespace Core::Memory { +class Memory; +} + +namespace Core { + +class DynarmicExclusiveMonitor final : public ExclusiveMonitor { +public: +    explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count); +    ~DynarmicExclusiveMonitor() override; + +    u8 ExclusiveRead8(std::size_t core_index, VAddr addr) override; +    u16 ExclusiveRead16(std::size_t core_index, VAddr addr) override; +    u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override; +    u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override; +    u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override; +    void ClearExclusive() override; + +    bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override; +    bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override; +    bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override; +    bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override; +    bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; + +private: +    friend class ARM_Dynarmic_32; +    friend class ARM_Dynarmic_64; +    Dynarmic::ExclusiveMonitor monitor; +    Core::Memory::Memory& memory; +}; + +} // namespace Core diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp index b32401e0b..d8cba369d 100644 --- a/src/core/arm/exclusive_monitor.cpp +++ b/src/core/arm/exclusive_monitor.cpp @@ -3,7 +3,7 @@  // Refer to the license.txt file included.  #ifdef ARCHITECTURE_x86_64 -#include "core/arm/dynarmic/arm_dynarmic_64.h" +#include "core/arm/dynarmic/arm_exclusive_monitor.h"  #endif  #include "core/arm/exclusive_monitor.h"  #include "core/memory.h" diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h index ccd73b80f..62f6e6023 100644 --- a/src/core/arm/exclusive_monitor.h +++ b/src/core/arm/exclusive_monitor.h @@ -18,7 +18,11 @@ class ExclusiveMonitor {  public:      virtual ~ExclusiveMonitor(); -    virtual void SetExclusive(std::size_t core_index, VAddr addr) = 0; +    virtual u8 ExclusiveRead8(std::size_t core_index, VAddr addr) = 0; +    virtual u16 ExclusiveRead16(std::size_t core_index, VAddr addr) = 0; +    virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0; +    virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0; +    virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;      virtual void ClearExclusive() = 0;      virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0; diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp index e40e9626a..1df3f3ed1 100644 --- a/src/core/arm/unicorn/arm_unicorn.cpp +++ b/src/core/arm/unicorn/arm_unicorn.cpp @@ -6,6 +6,7 @@  #include <unicorn/arm64.h>  #include "common/assert.h"  #include "common/microprofile.h" +#include "core/arm/cpu_interrupt_handler.h"  #include "core/arm/unicorn/arm_unicorn.h"  #include "core/core.h"  #include "core/core_timing.h" @@ -62,7 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si      return false;  } -ARM_Unicorn::ARM_Unicorn(System& system, Arch architecture) : ARM_Interface{system} { +ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, +                         Arch architecture, std::size_t core_index) +    : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, core_index{core_index} {      const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;      CHECKED(uc_open(arch, UC_MODE_ARM, &uc)); @@ -156,12 +159,20 @@ void ARM_Unicorn::SetTPIDR_EL0(u64 value) {      CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value));  } +void ARM_Unicorn::ChangeProcessorID(std::size_t new_core_id) { +    core_index = new_core_id; +} +  void ARM_Unicorn::Run() {      if (GDBStub::IsServerEnabled()) {          ExecuteInstructions(std::max(4000000U, 0U));      } else { -        ExecuteInstructions( -            std::max(std::size_t(system.CoreTiming().GetDowncount()), std::size_t{0})); +        while (true) { +            if (interrupt_handlers[core_index].IsInterrupted()) { +                return; +            } +            ExecuteInstructions(10); +        }      }  } @@ -183,8 +194,6 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {                             UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data()));      CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));      CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size())); - -    system.CoreTiming().AddTicks(num_instructions);      if (GDBStub::IsServerEnabled()) {          if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {              uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address); diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 725c65085..810aff311 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h @@ -20,7 +20,8 @@ public:          AArch64, // 64-bit ARM      }; -    explicit ARM_Unicorn(System& system, Arch architecture); +    explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock, +                         Arch architecture, std::size_t core_index);      ~ARM_Unicorn() override;      void SetPC(u64 pc) override; @@ -35,6 +36,7 @@ public:      void SetTlsAddress(VAddr address) override;      void SetTPIDR_EL0(u64 value) override;      u64 GetTPIDR_EL0() const override; +    void ChangeProcessorID(std::size_t new_core_id) override;      void PrepareReschedule() override;      void ClearExclusiveState() override;      void ExecuteInstructions(std::size_t num_instructions); @@ -55,6 +57,7 @@ private:      uc_engine* uc{};      GDBStub::BreakpointAddress last_bkpt{};      bool last_bkpt_hit = false; +    std::size_t core_index;  };  } // namespace Core diff --git a/src/core/core.cpp b/src/core/core.cpp index f9f8a3000..1a243c515 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -8,10 +8,10 @@  #include "common/file_util.h"  #include "common/logging/log.h" +#include "common/microprofile.h"  #include "common/string_util.h"  #include "core/arm/exclusive_monitor.h"  #include "core/core.h" -#include "core/core_manager.h"  #include "core/core_timing.h"  #include "core/cpu_manager.h"  #include "core/device_memory.h" @@ -51,6 +51,11 @@  #include "video_core/renderer_base.h"  #include "video_core/video_core.h" +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU0, "ARM JIT", "Dynarmic CPU 0", MP_RGB(255, 64, 64)); +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU1, "ARM JIT", "Dynarmic CPU 1", MP_RGB(255, 64, 64)); +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU2, "ARM JIT", "Dynarmic CPU 2", MP_RGB(255, 64, 64)); +MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU3, "ARM JIT", "Dynarmic CPU 3", MP_RGB(255, 64, 64)); +  namespace Core {  namespace { @@ -117,23 +122,22 @@ struct System::Impl {          : kernel{system}, fs_controller{system}, memory{system},            cpu_manager{system}, reporter{system}, applet_manager{system} {} -    CoreManager& CurrentCoreManager() { -        return cpu_manager.GetCurrentCoreManager(); -    } +    ResultStatus Run() { +        status = ResultStatus::Success; -    Kernel::PhysicalCore& CurrentPhysicalCore() { -        const auto index = cpu_manager.GetActiveCoreIndex(); -        return kernel.PhysicalCore(index); -    } +        kernel.Suspend(false); +        core_timing.SyncPause(false); +        cpu_manager.Pause(false); -    Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) { -        return kernel.PhysicalCore(index); +        return status;      } -    ResultStatus RunLoop(bool tight_loop) { +    ResultStatus Pause() {          status = ResultStatus::Success; -        cpu_manager.RunLoop(tight_loop); +        core_timing.SyncPause(true); +        kernel.Suspend(true); +        cpu_manager.Pause(true);          return status;      } @@ -143,7 +147,15 @@ struct System::Impl {          device_memory = std::make_unique<Core::DeviceMemory>(system); -        core_timing.Initialize(); +        is_multicore = Settings::values.use_multi_core; +        is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation; + +        kernel.SetMulticore(is_multicore); +        cpu_manager.SetMulticore(is_multicore); +        cpu_manager.SetAsyncGpu(is_async_gpu); +        core_timing.SetMulticore(is_multicore); + +        core_timing.Initialize([&system]() { system.RegisterHostThread(); });          kernel.Initialize();          cpu_manager.Initialize(); @@ -180,6 +192,11 @@ struct System::Impl {          is_powered_on = true;          exit_lock = false; +        microprofile_dynarmic[0] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU0); +        microprofile_dynarmic[1] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU1); +        microprofile_dynarmic[2] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU2); +        microprofile_dynarmic[3] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU3); +          LOG_DEBUG(Core, "Initialized OK");          return ResultStatus::Success; @@ -277,8 +294,6 @@ struct System::Impl {          service_manager.reset();          cheat_engine.reset();          telemetry_session.reset(); -        perf_stats.reset(); -        gpu_core.reset();          device_memory.reset();          // Close all CPU/threading state @@ -290,6 +305,8 @@ struct System::Impl {          // Close app loader          app_loader.reset(); +        gpu_core.reset(); +        perf_stats.reset();          // Clear all applets          applet_manager.ClearAll(); @@ -382,25 +399,35 @@ struct System::Impl {      std::unique_ptr<Core::PerfStats> perf_stats;      Core::FrameLimiter frame_limiter; + +    bool is_multicore{}; +    bool is_async_gpu{}; + +    std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{}; +    std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_dynarmic{};  };  System::System() : impl{std::make_unique<Impl>(*this)} {}  System::~System() = default; -CoreManager& System::CurrentCoreManager() { -    return impl->CurrentCoreManager(); +CpuManager& System::GetCpuManager() { +    return impl->cpu_manager; +} + +const CpuManager& System::GetCpuManager() const { +    return impl->cpu_manager;  } -const CoreManager& System::CurrentCoreManager() const { -    return impl->CurrentCoreManager(); +System::ResultStatus System::Run() { +    return impl->Run();  } -System::ResultStatus System::RunLoop(bool tight_loop) { -    return impl->RunLoop(tight_loop); +System::ResultStatus System::Pause() { +    return impl->Pause();  }  System::ResultStatus System::SingleStep() { -    return RunLoop(false); +    return ResultStatus::Success;  }  void System::InvalidateCpuInstructionCaches() { @@ -416,7 +443,7 @@ bool System::IsPoweredOn() const {  }  void System::PrepareReschedule() { -    impl->CurrentPhysicalCore().Stop(); +    // Deprecated, does nothing, kept for backward compatibility.  }  void System::PrepareReschedule(const u32 core_index) { @@ -436,31 +463,41 @@ const TelemetrySession& System::TelemetrySession() const {  }  ARM_Interface& System::CurrentArmInterface() { -    return impl->CurrentPhysicalCore().ArmInterface(); +    return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();  }  const ARM_Interface& System::CurrentArmInterface() const { -    return impl->CurrentPhysicalCore().ArmInterface(); +    return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();  }  std::size_t System::CurrentCoreIndex() const { -    return impl->cpu_manager.GetActiveCoreIndex(); +    std::size_t core = impl->kernel.GetCurrentHostThreadID(); +    ASSERT(core < Core::Hardware::NUM_CPU_CORES); +    return core;  }  Kernel::Scheduler& System::CurrentScheduler() { -    return impl->CurrentPhysicalCore().Scheduler(); +    return impl->kernel.CurrentScheduler();  }  const Kernel::Scheduler& System::CurrentScheduler() const { -    return impl->CurrentPhysicalCore().Scheduler(); +    return impl->kernel.CurrentScheduler(); +} + +Kernel::PhysicalCore& System::CurrentPhysicalCore() { +    return impl->kernel.CurrentPhysicalCore(); +} + +const Kernel::PhysicalCore& System::CurrentPhysicalCore() const { +    return impl->kernel.CurrentPhysicalCore();  }  Kernel::Scheduler& System::Scheduler(std::size_t core_index) { -    return impl->GetPhysicalCore(core_index).Scheduler(); +    return impl->kernel.Scheduler(core_index);  }  const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const { -    return impl->GetPhysicalCore(core_index).Scheduler(); +    return impl->kernel.Scheduler(core_index);  }  /// Gets the global scheduler @@ -490,20 +527,15 @@ const Kernel::Process* System::CurrentProcess() const {  }  ARM_Interface& System::ArmInterface(std::size_t core_index) { -    return impl->GetPhysicalCore(core_index).ArmInterface(); +    auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread(); +    ASSERT(thread && !thread->IsHLEThread()); +    return thread->ArmInterface();  }  const ARM_Interface& System::ArmInterface(std::size_t core_index) const { -    return impl->GetPhysicalCore(core_index).ArmInterface(); -} - -CoreManager& System::GetCoreManager(std::size_t core_index) { -    return impl->cpu_manager.GetCoreManager(core_index); -} - -const CoreManager& System::GetCoreManager(std::size_t core_index) const { -    ASSERT(core_index < NUM_CPU_CORES); -    return impl->cpu_manager.GetCoreManager(core_index); +    auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread(); +    ASSERT(thread && !thread->IsHLEThread()); +    return thread->ArmInterface();  }  ExclusiveMonitor& System::Monitor() { @@ -722,4 +754,18 @@ void System::RegisterHostThread() {      impl->kernel.RegisterHostThread();  } +void System::EnterDynarmicProfile() { +    std::size_t core = impl->kernel.GetCurrentHostThreadID(); +    impl->dynarmic_ticks[core] = MicroProfileEnter(impl->microprofile_dynarmic[core]); +} + +void System::ExitDynarmicProfile() { +    std::size_t core = impl->kernel.GetCurrentHostThreadID(); +    MicroProfileLeave(impl->microprofile_dynarmic[core], impl->dynarmic_ticks[core]); +} + +bool System::IsMulticore() const { +    return impl->is_multicore; +} +  } // namespace Core diff --git a/src/core/core.h b/src/core/core.h index acc53d6a1..5c6cfbffe 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -27,6 +27,7 @@ class VfsFilesystem;  namespace Kernel {  class GlobalScheduler;  class KernelCore; +class PhysicalCore;  class Process;  class Scheduler;  } // namespace Kernel @@ -90,7 +91,7 @@ class InterruptManager;  namespace Core {  class ARM_Interface; -class CoreManager; +class CpuManager;  class DeviceMemory;  class ExclusiveMonitor;  class FrameLimiter; @@ -136,16 +137,16 @@ public:      };      /** -     * Run the core CPU loop -     * This function runs the core for the specified number of CPU instructions before trying to -     * update hardware. This is much faster than SingleStep (and should be equivalent), as the CPU -     * is not required to do a full dispatch with each instruction. NOTE: the number of instructions -     * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware -     * update is requested (e.g. on a thread switch). -     * @param tight_loop If false, the CPU single-steps. -     * @return Result status, indicating whether or not the operation succeeded. +     * Run the OS and Application +     * This function will start emulation and run the relevant devices +     */ +    ResultStatus Run(); + +    /** +     * Pause the OS and Application +     * This function will pause emulation and stop the relevant devices       */ -    ResultStatus RunLoop(bool tight_loop = true); +    ResultStatus Pause();      /**       * Step the CPU one instruction @@ -209,17 +210,21 @@ public:      /// Gets the scheduler for the CPU core that is currently running      const Kernel::Scheduler& CurrentScheduler() const; +    /// Gets the physical core for the CPU core that is currently running +    Kernel::PhysicalCore& CurrentPhysicalCore(); + +    /// Gets the physical core for the CPU core that is currently running +    const Kernel::PhysicalCore& CurrentPhysicalCore() const; +      /// Gets a reference to an ARM interface for the CPU core with the specified index      ARM_Interface& ArmInterface(std::size_t core_index);      /// Gets a const reference to an ARM interface from the CPU core with the specified index      const ARM_Interface& ArmInterface(std::size_t core_index) const; -    /// Gets a CPU interface to the CPU core with the specified index -    CoreManager& GetCoreManager(std::size_t core_index); +    CpuManager& GetCpuManager(); -    /// Gets a CPU interface to the CPU core with the specified index -    const CoreManager& GetCoreManager(std::size_t core_index) const; +    const CpuManager& GetCpuManager() const;      /// Gets a reference to the exclusive monitor      ExclusiveMonitor& Monitor(); @@ -370,14 +375,17 @@ public:      /// Register a host thread as an auxiliary thread.      void RegisterHostThread(); -private: -    System(); +    /// Enter Dynarmic Microprofile +    void EnterDynarmicProfile(); + +    /// Exit Dynarmic Microprofile +    void ExitDynarmicProfile(); -    /// Returns the currently running CPU core -    CoreManager& CurrentCoreManager(); +    /// Tells if system is running on multicore. +    bool IsMulticore() const; -    /// Returns the currently running CPU core -    const CoreManager& CurrentCoreManager() const; +private: +    System();      /**       * Initialize the emulated system. diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp deleted file mode 100644 index b6b797c80..000000000 --- a/src/core/core_manager.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <condition_variable> -#include <mutex> - -#include "common/logging/log.h" -#include "core/arm/exclusive_monitor.h" -#include "core/arm/unicorn/arm_unicorn.h" -#include "core/core.h" -#include "core/core_manager.h" -#include "core/core_timing.h" -#include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/physical_core.h" -#include "core/hle/kernel/scheduler.h" -#include "core/hle/kernel/thread.h" -#include "core/hle/lock.h" -#include "core/settings.h" - -namespace Core { - -CoreManager::CoreManager(System& system, std::size_t core_index) -    : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore( -                                                      core_index)}, -      core_timing{system.CoreTiming()}, core_index{core_index} {} - -CoreManager::~CoreManager() = default; - -void CoreManager::RunLoop(bool tight_loop) { -    Reschedule(); - -    // If we don't have a currently active thread then don't execute instructions, -    // instead advance to the next event and try to yield to the next thread -    if (Kernel::GetCurrentThread() == nullptr) { -        LOG_TRACE(Core, "Core-{} idling", core_index); -        core_timing.Idle(); -    } else { -        if (tight_loop) { -            physical_core.Run(); -        } else { -            physical_core.Step(); -        } -    } -    core_timing.Advance(); - -    Reschedule(); -} - -void CoreManager::SingleStep() { -    return RunLoop(false); -} - -void CoreManager::PrepareReschedule() { -    physical_core.Stop(); -} - -void CoreManager::Reschedule() { -    // Lock the global kernel mutex when we manipulate the HLE state -    std::lock_guard lock(HLE::g_hle_lock); - -    global_scheduler.SelectThread(core_index); - -    physical_core.Scheduler().TryDoContextSwitch(); -} - -} // namespace Core diff --git a/src/core/core_manager.h b/src/core/core_manager.h deleted file mode 100644 index d525de00a..000000000 --- a/src/core/core_manager.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2018 yuzu emulator team -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <atomic> -#include <cstddef> -#include <memory> -#include "common/common_types.h" - -namespace Kernel { -class GlobalScheduler; -class PhysicalCore; -} // namespace Kernel - -namespace Core { -class System; -} - -namespace Core::Timing { -class CoreTiming; -} - -namespace Core::Memory { -class Memory; -} - -namespace Core { - -constexpr unsigned NUM_CPU_CORES{4}; - -class CoreManager { -public: -    CoreManager(System& system, std::size_t core_index); -    ~CoreManager(); - -    void RunLoop(bool tight_loop = true); - -    void SingleStep(); - -    void PrepareReschedule(); - -    bool IsMainCore() const { -        return core_index == 0; -    } - -    std::size_t CoreIndex() const { -        return core_index; -    } - -private: -    void Reschedule(); - -    Kernel::GlobalScheduler& global_scheduler; -    Kernel::PhysicalCore& physical_core; -    Timing::CoreTiming& core_timing; - -    std::atomic<bool> reschedule_pending = false; -    std::size_t core_index; -}; - -} // namespace Core diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 46d4178c4..5c83c41a4 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -1,29 +1,27 @@ -// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project -// Licensed under GPLv2+ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -#include "core/core_timing.h" -  #include <algorithm>  #include <mutex>  #include <string>  #include <tuple>  #include "common/assert.h" -#include "common/thread.h" +#include "common/microprofile.h" +#include "core/core_timing.h"  #include "core/core_timing_util.h" -#include "core/hardware_properties.h"  namespace Core::Timing { -constexpr int MAX_SLICE_LENGTH = 10000; +constexpr u64 MAX_SLICE_LENGTH = 4000;  std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {      return std::make_shared<EventType>(std::move(callback), std::move(name));  }  struct CoreTiming::Event { -    s64 time; +    u64 time;      u64 fifo_order;      u64 userdata;      std::weak_ptr<EventType> type; @@ -39,51 +37,90 @@ struct CoreTiming::Event {      }  }; -CoreTiming::CoreTiming() = default; -CoreTiming::~CoreTiming() = default; +CoreTiming::CoreTiming() { +    clock = +        Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ); +} -void CoreTiming::Initialize() { -    downcounts.fill(MAX_SLICE_LENGTH); -    time_slice.fill(MAX_SLICE_LENGTH); -    slice_length = MAX_SLICE_LENGTH; -    global_timer = 0; -    idled_cycles = 0; -    current_context = 0; +CoreTiming::~CoreTiming() = default; -    // The time between CoreTiming being initialized and the first call to Advance() is considered -    // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before -    // executing the first cycle of each slice to prepare the slice length and downcount for -    // that slice. -    is_global_timer_sane = true; +void CoreTiming::ThreadEntry(CoreTiming& instance) { +    constexpr char name[] = "yuzu:HostTiming"; +    MicroProfileOnThreadCreate(name); +    Common::SetCurrentThreadName(name); +    Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); +    instance.on_thread_init(); +    instance.ThreadLoop(); +} +void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) { +    on_thread_init = std::move(on_thread_init_);      event_fifo_id = 0; - +    shutting_down = false; +    ticks = 0;      const auto empty_timed_callback = [](u64, s64) {};      ev_lost = CreateEvent("_lost_event", empty_timed_callback); +    if (is_multicore) { +        timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this)); +    }  }  void CoreTiming::Shutdown() { +    paused = true; +    shutting_down = true; +    pause_event.Set(); +    event.Set(); +    if (timer_thread) { +        timer_thread->join(); +    }      ClearPendingEvents(); +    timer_thread.reset(); +    has_started = false;  } -void CoreTiming::ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type, -                               u64 userdata) { -    std::lock_guard guard{inner_mutex}; -    const s64 timeout = GetTicks() + cycles_into_future; +void CoreTiming::Pause(bool is_paused) { +    paused = is_paused; +    pause_event.Set(); +} -    // If this event needs to be scheduled before the next advance(), force one early -    if (!is_global_timer_sane) { -        ForceExceptionCheck(cycles_into_future); +void CoreTiming::SyncPause(bool is_paused) { +    if (is_paused == paused && paused_set == paused) { +        return; +    } +    Pause(is_paused); +    if (timer_thread) { +        if (!is_paused) { +            pause_event.Set(); +        } +        event.Set(); +        while (paused_set != is_paused) +            ;      } +} -    event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); +bool CoreTiming::IsRunning() const { +    return !paused_set; +} -    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); +bool CoreTiming::HasPendingEvents() const { +    return !(wait_set && event_queue.empty());  } -void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) { -    std::lock_guard guard{inner_mutex}; +void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type, +                               u64 userdata) { +    { +        std::scoped_lock scope{basic_lock}; +        const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future); + +        event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); +        std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); +    } +    event.Set(); +} + +void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) { +    std::scoped_lock scope{basic_lock};      const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {          return e.type.lock().get() == event_type.get() && e.userdata == userdata;      }); @@ -95,21 +132,39 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u      }  } -u64 CoreTiming::GetTicks() const { -    u64 ticks = static_cast<u64>(global_timer); -    if (!is_global_timer_sane) { -        ticks += accumulated_ticks; +void CoreTiming::AddTicks(u64 ticks) { +    this->ticks += ticks; +    downcount -= ticks; +} + +void CoreTiming::Idle() { +    if (!event_queue.empty()) { +        const u64 next_event_time = event_queue.front().time; +        const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U; +        if (next_ticks > ticks) { +            ticks = next_ticks; +        } +        return;      } -    return ticks; +    ticks += 1000U;  } -u64 CoreTiming::GetIdleTicks() const { -    return static_cast<u64>(idled_cycles); +void CoreTiming::ResetTicks() { +    downcount = MAX_SLICE_LENGTH;  } -void CoreTiming::AddTicks(u64 ticks) { -    accumulated_ticks += ticks; -    downcounts[current_context] -= static_cast<s64>(ticks); +u64 CoreTiming::GetCPUTicks() const { +    if (is_multicore) { +        return clock->GetCPUCycles(); +    } +    return ticks; +} + +u64 CoreTiming::GetClockTicks() const { +    if (is_multicore) { +        return clock->GetClockCycles(); +    } +    return CpuCyclesToClockCycles(ticks);  }  void CoreTiming::ClearPendingEvents() { @@ -117,7 +172,7 @@ void CoreTiming::ClearPendingEvents() {  }  void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { -    std::lock_guard guard{inner_mutex}; +    basic_lock.lock();      const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {          return e.type.lock().get() == event_type.get(); @@ -128,99 +183,72 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {          event_queue.erase(itr, event_queue.end());          std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());      } +    basic_lock.unlock();  } -void CoreTiming::ForceExceptionCheck(s64 cycles) { -    cycles = std::max<s64>(0, cycles); -    if (downcounts[current_context] <= cycles) { -        return; -    } - -    // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int -    // here. Account for cycles already executed by adjusting the g.slice_length -    downcounts[current_context] = static_cast<int>(cycles); -} - -std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const { -    const u64 original_context = current_context; -    u64 next_context = (original_context + 1) % num_cpu_cores; -    while (next_context != original_context) { -        if (time_slice[next_context] >= needed_ticks) { -            return {next_context}; -        } else if (time_slice[next_context] >= 0) { -            return std::nullopt; -        } -        next_context = (next_context + 1) % num_cpu_cores; -    } -    return std::nullopt; -} - -void CoreTiming::Advance() { -    std::unique_lock<std::mutex> guard(inner_mutex); - -    const u64 cycles_executed = accumulated_ticks; -    time_slice[current_context] = std::max<s64>(0, time_slice[current_context] - accumulated_ticks); -    global_timer += cycles_executed; - -    is_global_timer_sane = true; +std::optional<s64> CoreTiming::Advance() { +    std::scoped_lock advance_scope{advance_lock}; +    std::scoped_lock basic_scope{basic_lock}; +    global_timer = GetGlobalTimeNs().count();      while (!event_queue.empty() && event_queue.front().time <= global_timer) {          Event evt = std::move(event_queue.front());          std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());          event_queue.pop_back(); -        inner_mutex.unlock(); +        basic_lock.unlock();          if (auto event_type{evt.type.lock()}) {              event_type->callback(evt.userdata, global_timer - evt.time);          } -        inner_mutex.lock(); +        basic_lock.lock(); +        global_timer = GetGlobalTimeNs().count();      } -    is_global_timer_sane = false; - -    // Still events left (scheduled in the future)      if (!event_queue.empty()) { -        const s64 needed_ticks = -            std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); -        const auto next_core = NextAvailableCore(needed_ticks); -        if (next_core) { -            downcounts[*next_core] = needed_ticks; -        } +        const s64 next_time = event_queue.front().time - global_timer; +        return next_time; +    } else { +        return std::nullopt;      } - -    accumulated_ticks = 0; - -    downcounts[current_context] = time_slice[current_context];  } -void CoreTiming::ResetRun() { -    downcounts.fill(MAX_SLICE_LENGTH); -    time_slice.fill(MAX_SLICE_LENGTH); -    current_context = 0; -    // Still events left (scheduled in the future) -    if (!event_queue.empty()) { -        const s64 needed_ticks = -            std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); -        downcounts[current_context] = needed_ticks; +void CoreTiming::ThreadLoop() { +    has_started = true; +    while (!shutting_down) { +        while (!paused) { +            paused_set = false; +            const auto next_time = Advance(); +            if (next_time) { +                if (*next_time > 0) { +                    std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time); +                    event.WaitFor(next_time_ns); +                } +            } else { +                wait_set = true; +                event.Wait(); +            } +            wait_set = false; +        } +        paused_set = true; +        clock->Pause(true); +        pause_event.Wait(); +        clock->Pause(false);      } - -    is_global_timer_sane = false; -    accumulated_ticks = 0;  } -void CoreTiming::Idle() { -    accumulated_ticks += downcounts[current_context]; -    idled_cycles += downcounts[current_context]; -    downcounts[current_context] = 0; +std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { +    if (is_multicore) { +        return clock->GetTimeNS(); +    } +    return CyclesToNs(ticks);  }  std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { -    return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE}; -} - -s64 CoreTiming::GetDowncount() const { -    return downcounts[current_context]; +    if (is_multicore) { +        return clock->GetTimeUS(); +    } +    return CyclesToUs(ticks);  }  } // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index d50f4eb8a..72faaab64 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -1,19 +1,25 @@ -// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project -// Licensed under GPLv2+ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version  // Refer to the license.txt file included.  #pragma once +#include <atomic>  #include <chrono>  #include <functional>  #include <memory>  #include <mutex>  #include <optional>  #include <string> +#include <thread>  #include <vector>  #include "common/common_types.h" +#include "common/spin_lock.h" +#include "common/thread.h"  #include "common/threadsafe_queue.h" +#include "common/wall_clock.h" +#include "core/hardware_properties.h"  namespace Core::Timing { @@ -56,16 +62,40 @@ public:      /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is      /// required to end slice - 1 and start slice 0 before the first cycle of code is executed. -    void Initialize(); +    void Initialize(std::function<void(void)>&& on_thread_init_);      /// Tears down all timing related functionality.      void Shutdown(); -    /// After the first Advance, the slice lengths and the downcount will be reduced whenever an -    /// event is scheduled earlier than the current values. -    /// -    /// Scheduling from a callback will not update the downcount until the Advance() completes. -    void ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type, +    /// Sets if emulation is multicore or single core, must be set before Initialize +    void SetMulticore(bool is_multicore) { +        this->is_multicore = is_multicore; +    } + +    /// Check if it's using host timing. +    bool IsHostTiming() const { +        return is_multicore; +    } + +    /// Pauses/Unpauses the execution of the timer thread. +    void Pause(bool is_paused); + +    /// Pauses/Unpauses the execution of the timer thread and waits until paused. +    void SyncPause(bool is_paused); + +    /// Checks if core timing is running. +    bool IsRunning() const; + +    /// Checks if the timer thread has started. +    bool HasStarted() const { +        return has_started; +    } + +    /// Checks if there are any pending time events. +    bool HasPendingEvents() const; + +    /// Schedules an event in core timing +    void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,                         u64 userdata = 0);      void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata); @@ -73,41 +103,30 @@ public:      /// We only permit one event of each type in the queue at a time.      void RemoveEvent(const std::shared_ptr<EventType>& event_type); -    void ForceExceptionCheck(s64 cycles); - -    /// This should only be called from the emu thread, if you are calling it any other thread, -    /// you are doing something evil -    u64 GetTicks() const; - -    u64 GetIdleTicks() const; -      void AddTicks(u64 ticks); -    /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends -    /// the previous timing slice and begins the next one, you must Advance from the previous -    /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an -    /// Advance() is required to initialize the slice length before the first cycle of emulated -    /// instructions is executed. -    void Advance(); +    void ResetTicks(); -    /// Pretend that the main CPU has executed enough cycles to reach the next event.      void Idle(); -    std::chrono::microseconds GetGlobalTimeUs() const; +    s64 GetDowncount() const { +        return downcount; +    } -    void ResetRun(); +    /// Returns current time in emulated CPU cycles +    u64 GetCPUTicks() const; -    s64 GetDowncount() const; +    /// Returns current time in emulated in Clock cycles +    u64 GetClockTicks() const; -    void SwitchContext(u64 new_context) { -        current_context = new_context; -    } +    /// Returns current time in microseconds. +    std::chrono::microseconds GetGlobalTimeUs() const; -    bool CanCurrentContextRun() const { -        return time_slice[current_context] > 0; -    } +    /// Returns current time in nanoseconds. +    std::chrono::nanoseconds GetGlobalTimeNs() const; -    std::optional<u64> NextAvailableCore(const s64 needed_ticks) const; +    /// Checks for events manually and returns time in nanoseconds for next event, threadsafe. +    std::optional<s64> Advance();  private:      struct Event; @@ -115,21 +134,14 @@ private:      /// Clear all pending events. This should ONLY be done on exit.      void ClearPendingEvents(); -    static constexpr u64 num_cpu_cores = 4; +    static void ThreadEntry(CoreTiming& instance); +    void ThreadLoop(); -    s64 global_timer = 0; -    s64 idled_cycles = 0; -    s64 slice_length = 0; -    u64 accumulated_ticks = 0; -    std::array<s64, num_cpu_cores> downcounts{}; -    // Slice of time assigned to each core per run. -    std::array<s64, num_cpu_cores> time_slice{}; -    u64 current_context = 0; +    std::unique_ptr<Common::WallClock> clock; -    // Are we in a function that has been called from Advance() -    // If events are scheduled from a function that gets called from Advance(), -    // don't change slice_length and downcount. -    bool is_global_timer_sane = false; +    u64 global_timer = 0; + +    std::chrono::nanoseconds start_point;      // The queue is a min-heap using std::make_heap/push_heap/pop_heap.      // We don't use std::priority_queue because we need to be able to serialize, unserialize and @@ -139,8 +151,23 @@ private:      u64 event_fifo_id = 0;      std::shared_ptr<EventType> ev_lost; - -    std::mutex inner_mutex; +    Common::Event event{}; +    Common::Event pause_event{}; +    Common::SpinLock basic_lock{}; +    Common::SpinLock advance_lock{}; +    std::unique_ptr<std::thread> timer_thread; +    std::atomic<bool> paused{}; +    std::atomic<bool> paused_set{}; +    std::atomic<bool> wait_set{}; +    std::atomic<bool> shutting_down{}; +    std::atomic<bool> has_started{}; +    std::function<void(void)> on_thread_init{}; + +    bool is_multicore{}; + +    /// Cycle timing +    u64 ticks{}; +    s64 downcount{};  };  /// Creates a core timing event with the given name and callback. diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index de50d3b14..aefc63663 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp @@ -38,15 +38,23 @@ s64 usToCycles(std::chrono::microseconds us) {  }  s64 nsToCycles(std::chrono::nanoseconds ns) { -    if (static_cast<u64>(ns.count() / 1000000000) > MAX_VALUE_TO_MULTIPLY) { -        LOG_ERROR(Core_Timing, "Integer overflow, use max value"); -        return std::numeric_limits<s64>::max(); -    } -    if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) { -        LOG_DEBUG(Core_Timing, "Time very big, do rounding"); -        return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000); -    } -    return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000; +    const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE); +    return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first; +} + +u64 msToClockCycles(std::chrono::milliseconds ns) { +    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); +    return Common::Divide128On32(temp, 1000).first; +} + +u64 usToClockCycles(std::chrono::microseconds ns) { +    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); +    return Common::Divide128On32(temp, 1000000).first; +} + +u64 nsToClockCycles(std::chrono::nanoseconds ns) { +    const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ); +    return Common::Divide128On32(temp, 1000000000).first;  }  u64 CpuCyclesToClockCycles(u64 ticks) { @@ -54,4 +62,22 @@ u64 CpuCyclesToClockCycles(u64 ticks) {      return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;  } +std::chrono::milliseconds CyclesToMs(s64 cycles) { +    const u128 temporal = Common::Multiply64Into128(cycles, 1000); +    u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; +    return std::chrono::milliseconds(ms); +} + +std::chrono::nanoseconds CyclesToNs(s64 cycles) { +    const u128 temporal = Common::Multiply64Into128(cycles, 1000000000); +    u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; +    return std::chrono::nanoseconds(ns); +} + +std::chrono::microseconds CyclesToUs(s64 cycles) { +    const u128 temporal = Common::Multiply64Into128(cycles, 1000000); +    u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; +    return std::chrono::microseconds(us); +} +  } // namespace Core::Timing diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index addc72b19..2ed979e14 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h @@ -13,18 +13,12 @@ namespace Core::Timing {  s64 msToCycles(std::chrono::milliseconds ms);  s64 usToCycles(std::chrono::microseconds us);  s64 nsToCycles(std::chrono::nanoseconds ns); - -inline std::chrono::milliseconds CyclesToMs(s64 cycles) { -    return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE); -} - -inline std::chrono::nanoseconds CyclesToNs(s64 cycles) { -    return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE); -} - -inline std::chrono::microseconds CyclesToUs(s64 cycles) { -    return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE); -} +u64 msToClockCycles(std::chrono::milliseconds ns); +u64 usToClockCycles(std::chrono::microseconds ns); +u64 nsToClockCycles(std::chrono::nanoseconds ns); +std::chrono::milliseconds CyclesToMs(s64 cycles); +std::chrono::nanoseconds CyclesToNs(s64 cycles); +std::chrono::microseconds CyclesToUs(s64 cycles);  u64 CpuCyclesToClockCycles(u64 ticks); diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp index 70ddbdcca..32afcf3ae 100644 --- a/src/core/cpu_manager.cpp +++ b/src/core/cpu_manager.cpp @@ -2,80 +2,372 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include "common/fiber.h" +#include "common/microprofile.h" +#include "common/thread.h"  #include "core/arm/exclusive_monitor.h"  #include "core/core.h" -#include "core/core_manager.h"  #include "core/core_timing.h"  #include "core/cpu_manager.h"  #include "core/gdbstub/gdbstub.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h" +#include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/thread.h" +#include "video_core/gpu.h"  namespace Core {  CpuManager::CpuManager(System& system) : system{system} {}  CpuManager::~CpuManager() = default; +void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) { +    cpu_manager.RunThread(core); +} +  void CpuManager::Initialize() { -    for (std::size_t index = 0; index < core_managers.size(); ++index) { -        core_managers[index] = std::make_unique<CoreManager>(system, index); +    running_mode = true; +    if (is_multicore) { +        for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +            core_data[core].host_thread = +                std::make_unique<std::thread>(ThreadStart, std::ref(*this), core); +        } +    } else { +        core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0);      }  }  void CpuManager::Shutdown() { -    for (auto& cpu_core : core_managers) { -        cpu_core.reset(); +    running_mode = false; +    Pause(false); +    if (is_multicore) { +        for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +            core_data[core].host_thread->join(); +            core_data[core].host_thread.reset(); +        } +    } else { +        core_data[0].host_thread->join(); +        core_data[0].host_thread.reset();      }  } -CoreManager& CpuManager::GetCoreManager(std::size_t index) { -    return *core_managers.at(index); +std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() { +    return std::function<void(void*)>(GuestThreadFunction);  } -const CoreManager& CpuManager::GetCoreManager(std::size_t index) const { -    return *core_managers.at(index); +std::function<void(void*)> CpuManager::GetIdleThreadStartFunc() { +    return std::function<void(void*)>(IdleThreadFunction);  } -CoreManager& CpuManager::GetCurrentCoreManager() { -    // Otherwise, use single-threaded mode active_core variable -    return *core_managers[active_core]; +std::function<void(void*)> CpuManager::GetSuspendThreadStartFunc() { +    return std::function<void(void*)>(SuspendThreadFunction);  } -const CoreManager& CpuManager::GetCurrentCoreManager() const { -    // Otherwise, use single-threaded mode active_core variable -    return *core_managers[active_core]; +void CpuManager::GuestThreadFunction(void* cpu_manager_) { +    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_); +    if (cpu_manager->is_multicore) { +        cpu_manager->MultiCoreRunGuestThread(); +    } else { +        cpu_manager->SingleCoreRunGuestThread(); +    }  } -void CpuManager::RunLoop(bool tight_loop) { -    if (GDBStub::IsServerEnabled()) { -        GDBStub::HandlePacket(); +void CpuManager::GuestRewindFunction(void* cpu_manager_) { +    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_); +    if (cpu_manager->is_multicore) { +        cpu_manager->MultiCoreRunGuestLoop(); +    } else { +        cpu_manager->SingleCoreRunGuestLoop(); +    } +} -        // If the loop is halted and we want to step, use a tiny (1) number of instructions to -        // execute. Otherwise, get out of the loop function. -        if (GDBStub::GetCpuHaltFlag()) { -            if (GDBStub::GetCpuStepFlag()) { -                tight_loop = false; -            } else { -                return; +void CpuManager::IdleThreadFunction(void* cpu_manager_) { +    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_); +    if (cpu_manager->is_multicore) { +        cpu_manager->MultiCoreRunIdleThread(); +    } else { +        cpu_manager->SingleCoreRunIdleThread(); +    } +} + +void CpuManager::SuspendThreadFunction(void* cpu_manager_) { +    CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_); +    if (cpu_manager->is_multicore) { +        cpu_manager->MultiCoreRunSuspendThread(); +    } else { +        cpu_manager->SingleCoreRunSuspendThread(); +    } +} + +void* CpuManager::GetStartFuncParamater() { +    return static_cast<void*>(this); +} + +/////////////////////////////////////////////////////////////////////////////// +///                             MultiCore                                   /// +/////////////////////////////////////////////////////////////////////////////// + +void CpuManager::MultiCoreRunGuestThread() { +    auto& kernel = system.Kernel(); +    { +        auto& sched = kernel.CurrentScheduler(); +        sched.OnThreadStart(); +    } +    MultiCoreRunGuestLoop(); +} + +void CpuManager::MultiCoreRunGuestLoop() { +    auto& kernel = system.Kernel(); +    auto* thread = kernel.CurrentScheduler().GetCurrentThread(); +    while (true) { +        auto* physical_core = &kernel.CurrentPhysicalCore(); +        auto& arm_interface = thread->ArmInterface(); +        system.EnterDynarmicProfile(); +        while (!physical_core->IsInterrupted()) { +            arm_interface.Run(); +            physical_core = &kernel.CurrentPhysicalCore(); +        } +        system.ExitDynarmicProfile(); +        arm_interface.ClearExclusiveState(); +        auto& scheduler = kernel.CurrentScheduler(); +        scheduler.TryDoContextSwitch(); +    } +} + +void CpuManager::MultiCoreRunIdleThread() { +    auto& kernel = system.Kernel(); +    while (true) { +        auto& physical_core = kernel.CurrentPhysicalCore(); +        physical_core.Idle(); +        auto& scheduler = kernel.CurrentScheduler(); +        scheduler.TryDoContextSwitch(); +    } +} + +void CpuManager::MultiCoreRunSuspendThread() { +    auto& kernel = system.Kernel(); +    { +        auto& sched = kernel.CurrentScheduler(); +        sched.OnThreadStart(); +    } +    while (true) { +        auto core = kernel.GetCurrentHostThreadID(); +        auto& scheduler = kernel.CurrentScheduler(); +        Kernel::Thread* current_thread = scheduler.GetCurrentThread(); +        Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context); +        ASSERT(scheduler.ContextSwitchPending()); +        ASSERT(core == kernel.GetCurrentHostThreadID()); +        scheduler.TryDoContextSwitch(); +    } +} + +void CpuManager::MultiCorePause(bool paused) { +    if (!paused) { +        bool all_not_barrier = false; +        while (!all_not_barrier) { +            all_not_barrier = true; +            for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +                all_not_barrier &= +                    !core_data[core].is_running.load() && core_data[core].initialized.load(); +            } +        } +        for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +            core_data[core].enter_barrier->Set(); +        } +        if (paused_state.load()) { +            bool all_barrier = false; +            while (!all_barrier) { +                all_barrier = true; +                for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +                    all_barrier &= +                        core_data[core].is_paused.load() && core_data[core].initialized.load(); +                } +            } +            for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +                core_data[core].exit_barrier->Set(); +            } +        } +    } else { +        /// Wait until all cores are paused. +        bool all_barrier = false; +        while (!all_barrier) { +            all_barrier = true; +            for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +                all_barrier &= +                    core_data[core].is_paused.load() && core_data[core].initialized.load();              }          } +        /// Don't release the barrier      } +    paused_state = paused; +} + +/////////////////////////////////////////////////////////////////////////////// +///                             SingleCore                                   /// +/////////////////////////////////////////////////////////////////////////////// -    auto& core_timing = system.CoreTiming(); -    core_timing.ResetRun(); -    bool keep_running{}; -    do { -        keep_running = false; -        for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) { -            core_timing.SwitchContext(active_core); -            if (core_timing.CanCurrentContextRun()) { -                core_managers[active_core]->RunLoop(tight_loop); +void CpuManager::SingleCoreRunGuestThread() { +    auto& kernel = system.Kernel(); +    { +        auto& sched = kernel.CurrentScheduler(); +        sched.OnThreadStart(); +    } +    SingleCoreRunGuestLoop(); +} + +void CpuManager::SingleCoreRunGuestLoop() { +    auto& kernel = system.Kernel(); +    auto* thread = kernel.CurrentScheduler().GetCurrentThread(); +    while (true) { +        auto* physical_core = &kernel.CurrentPhysicalCore(); +        auto& arm_interface = thread->ArmInterface(); +        system.EnterDynarmicProfile(); +        if (!physical_core->IsInterrupted()) { +            arm_interface.Run(); +            physical_core = &kernel.CurrentPhysicalCore(); +        } +        system.ExitDynarmicProfile(); +        thread->SetPhantomMode(true); +        system.CoreTiming().Advance(); +        thread->SetPhantomMode(false); +        arm_interface.ClearExclusiveState(); +        PreemptSingleCore(); +        auto& scheduler = kernel.Scheduler(current_core); +        scheduler.TryDoContextSwitch(); +    } +} + +void CpuManager::SingleCoreRunIdleThread() { +    auto& kernel = system.Kernel(); +    while (true) { +        auto& physical_core = kernel.CurrentPhysicalCore(); +        PreemptSingleCore(false); +        system.CoreTiming().AddTicks(1000U); +        idle_count++; +        auto& scheduler = physical_core.Scheduler(); +        scheduler.TryDoContextSwitch(); +    } +} + +void CpuManager::SingleCoreRunSuspendThread() { +    auto& kernel = system.Kernel(); +    { +        auto& sched = kernel.CurrentScheduler(); +        sched.OnThreadStart(); +    } +    while (true) { +        auto core = kernel.GetCurrentHostThreadID(); +        auto& scheduler = kernel.CurrentScheduler(); +        Kernel::Thread* current_thread = scheduler.GetCurrentThread(); +        Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context); +        ASSERT(scheduler.ContextSwitchPending()); +        ASSERT(core == kernel.GetCurrentHostThreadID()); +        scheduler.TryDoContextSwitch(); +    } +} + +void CpuManager::PreemptSingleCore(bool from_running_enviroment) { +    std::size_t old_core = current_core; +    auto& scheduler = system.Kernel().Scheduler(old_core); +    Kernel::Thread* current_thread = scheduler.GetCurrentThread(); +    if (idle_count >= 4 || from_running_enviroment) { +        if (!from_running_enviroment) { +            system.CoreTiming().Idle(); +            idle_count = 0; +        } +        current_thread->SetPhantomMode(true); +        system.CoreTiming().Advance(); +        current_thread->SetPhantomMode(false); +    } +    current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES); +    system.CoreTiming().ResetTicks(); +    scheduler.Unload(); +    auto& next_scheduler = system.Kernel().Scheduler(current_core); +    Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext()); +    /// May have changed scheduler +    auto& current_scheduler = system.Kernel().Scheduler(current_core); +    current_scheduler.Reload(); +    auto* currrent_thread2 = current_scheduler.GetCurrentThread(); +    if (!currrent_thread2->IsIdleThread()) { +        idle_count = 0; +    } +} + +void CpuManager::SingleCorePause(bool paused) { +    if (!paused) { +        bool all_not_barrier = false; +        while (!all_not_barrier) { +            all_not_barrier = !core_data[0].is_running.load() && core_data[0].initialized.load(); +        } +        core_data[0].enter_barrier->Set(); +        if (paused_state.load()) { +            bool all_barrier = false; +            while (!all_barrier) { +                all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load();              } -            keep_running |= core_timing.CanCurrentContextRun(); +            core_data[0].exit_barrier->Set();          } -    } while (keep_running); +    } else { +        /// Wait until all cores are paused. +        bool all_barrier = false; +        while (!all_barrier) { +            all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load(); +        } +        /// Don't release the barrier +    } +    paused_state = paused; +} + +void CpuManager::Pause(bool paused) { +    if (is_multicore) { +        MultiCorePause(paused); +    } else { +        SingleCorePause(paused); +    } +} -    if (GDBStub::IsServerEnabled()) { -        GDBStub::SetCpuStepFlag(false); +void CpuManager::RunThread(std::size_t core) { +    /// Initialization +    system.RegisterCoreThread(core); +    std::string name; +    if (is_multicore) { +        name = "yuzu:CoreCPUThread_" + std::to_string(core); +    } else { +        name = "yuzu:CPUThread"; +    } +    MicroProfileOnThreadCreate(name.c_str()); +    Common::SetCurrentThreadName(name.c_str()); +    Common::SetCurrentThreadPriority(Common::ThreadPriority::High); +    auto& data = core_data[core]; +    data.enter_barrier = std::make_unique<Common::Event>(); +    data.exit_barrier = std::make_unique<Common::Event>(); +    data.host_context = Common::Fiber::ThreadToFiber(); +    data.is_running = false; +    data.initialized = true; +    const bool sc_sync = !is_async_gpu && !is_multicore; +    bool sc_sync_first_use = sc_sync; +    /// Running +    while (running_mode) { +        data.is_running = false; +        data.enter_barrier->Wait(); +        if (sc_sync_first_use) { +            system.GPU().ObtainContext(); +            sc_sync_first_use = false; +        } +        auto& scheduler = system.Kernel().CurrentScheduler(); +        Kernel::Thread* current_thread = scheduler.GetCurrentThread(); +        data.is_running = true; +        Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext()); +        data.is_running = false; +        data.is_paused = true; +        data.exit_barrier->Wait(); +        data.is_paused = false;      } +    /// Time to cleanup +    data.host_context->Exit(); +    data.enter_barrier.reset(); +    data.exit_barrier.reset(); +    data.initialized = false;  }  } // namespace Core diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h index 97554d1bb..35929ed94 100644 --- a/src/core/cpu_manager.h +++ b/src/core/cpu_manager.h @@ -5,12 +5,19 @@  #pragma once  #include <array> +#include <atomic> +#include <functional>  #include <memory> +#include <thread>  #include "core/hardware_properties.h" +namespace Common { +class Event; +class Fiber; +} // namespace Common +  namespace Core { -class CoreManager;  class System;  class CpuManager { @@ -24,24 +31,75 @@ public:      CpuManager& operator=(const CpuManager&) = delete;      CpuManager& operator=(CpuManager&&) = delete; +    /// Sets if emulation is multicore or single core, must be set before Initialize +    void SetMulticore(bool is_multicore) { +        this->is_multicore = is_multicore; +    } + +    /// Sets if emulation is using an asynchronous GPU. +    void SetAsyncGpu(bool is_async_gpu) { +        this->is_async_gpu = is_async_gpu; +    } +      void Initialize();      void Shutdown(); -    CoreManager& GetCoreManager(std::size_t index); -    const CoreManager& GetCoreManager(std::size_t index) const; +    void Pause(bool paused); -    CoreManager& GetCurrentCoreManager(); -    const CoreManager& GetCurrentCoreManager() const; +    std::function<void(void*)> GetGuestThreadStartFunc(); +    std::function<void(void*)> GetIdleThreadStartFunc(); +    std::function<void(void*)> GetSuspendThreadStartFunc(); +    void* GetStartFuncParamater(); -    std::size_t GetActiveCoreIndex() const { -        return active_core; -    } +    void PreemptSingleCore(bool from_running_enviroment = true); -    void RunLoop(bool tight_loop); +    std::size_t CurrentCore() const { +        return current_core.load(); +    }  private: -    std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers; -    std::size_t active_core{}; ///< Active core, only used in single thread mode +    static void GuestThreadFunction(void* cpu_manager); +    static void GuestRewindFunction(void* cpu_manager); +    static void IdleThreadFunction(void* cpu_manager); +    static void SuspendThreadFunction(void* cpu_manager); + +    void MultiCoreRunGuestThread(); +    void MultiCoreRunGuestLoop(); +    void MultiCoreRunIdleThread(); +    void MultiCoreRunSuspendThread(); +    void MultiCorePause(bool paused); + +    void SingleCoreRunGuestThread(); +    void SingleCoreRunGuestLoop(); +    void SingleCoreRunIdleThread(); +    void SingleCoreRunSuspendThread(); +    void SingleCorePause(bool paused); + +    static void ThreadStart(CpuManager& cpu_manager, std::size_t core); + +    void RunThread(std::size_t core); + +    struct CoreData { +        std::shared_ptr<Common::Fiber> host_context; +        std::unique_ptr<Common::Event> enter_barrier; +        std::unique_ptr<Common::Event> exit_barrier; +        std::atomic<bool> is_running; +        std::atomic<bool> is_paused; +        std::atomic<bool> initialized; +        std::unique_ptr<std::thread> host_thread; +    }; + +    std::atomic<bool> running_mode{}; +    std::atomic<bool> paused_state{}; + +    std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{}; + +    bool is_async_gpu{}; +    bool is_multicore{}; +    std::atomic<std::size_t> current_core{}; +    std::size_t preemption_count{}; +    std::size_t idle_count{}; +    static constexpr std::size_t max_cycle_runs = 5;      System& system;  }; diff --git a/src/core/crypto/key_manager.h b/src/core/crypto/key_manager.h index 7265c4171..9269a73f2 100644 --- a/src/core/crypto/key_manager.h +++ b/src/core/crypto/key_manager.h @@ -223,7 +223,16 @@ bool operator<(const KeyIndex<KeyType>& lhs, const KeyIndex<KeyType>& rhs) {  class KeyManager {  public: -    KeyManager(); +    static KeyManager& Instance() { +        static KeyManager instance; +        return instance; +    } + +    KeyManager(const KeyManager&) = delete; +    KeyManager& operator=(const KeyManager&) = delete; + +    KeyManager(KeyManager&&) = delete; +    KeyManager& operator=(KeyManager&&) = delete;      bool HasKey(S128KeyType id, u64 field1 = 0, u64 field2 = 0) const;      bool HasKey(S256KeyType id, u64 field1 = 0, u64 field2 = 0) const; @@ -257,6 +266,8 @@ public:      bool AddTicketPersonalized(Ticket raw);  private: +    KeyManager(); +      std::map<KeyIndex<S128KeyType>, Key128> s128_keys;      std::map<KeyIndex<S256KeyType>, Key256> s256_keys; diff --git a/src/core/file_sys/bis_factory.cpp b/src/core/file_sys/bis_factory.cpp index 0af44f340..8935a62c3 100644 --- a/src/core/file_sys/bis_factory.cpp +++ b/src/core/file_sys/bis_factory.cpp @@ -79,7 +79,7 @@ VirtualDir BISFactory::OpenPartition(BisPartitionId id) const {  }  VirtualFile BISFactory::OpenPartitionStorage(BisPartitionId id) const { -    Core::Crypto::KeyManager keys; +    auto& keys = Core::Crypto::KeyManager::Instance();      Core::Crypto::PartitionDataManager pdm{          Core::System::GetInstance().GetFilesystem()->OpenDirectory(              FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir), Mode::Read)}; diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp index 07d0c8d5d..664a47e7f 100644 --- a/src/core/file_sys/card_image.cpp +++ b/src/core/file_sys/card_image.cpp @@ -178,7 +178,7 @@ u32 XCI::GetSystemUpdateVersion() {          return 0;      for (const auto& file : update->GetFiles()) { -        NCA nca{file, nullptr, 0, keys}; +        NCA nca{file, nullptr, 0};          if (nca.GetStatus() != Loader::ResultStatus::Success)              continue; @@ -286,7 +286,7 @@ Loader::ResultStatus XCI::AddNCAFromPartition(XCIPartition part) {              continue;          } -        auto nca = std::make_shared<NCA>(file, nullptr, 0, keys); +        auto nca = std::make_shared<NCA>(file, nullptr, 0);          if (nca->IsUpdate()) {              continue;          } diff --git a/src/core/file_sys/card_image.h b/src/core/file_sys/card_image.h index c2ee0ea99..e1b136426 100644 --- a/src/core/file_sys/card_image.h +++ b/src/core/file_sys/card_image.h @@ -140,6 +140,6 @@ private:      u64 update_normal_partition_end; -    Core::Crypto::KeyManager keys; +    Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();  };  } // namespace FileSys diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp index b8bbdd1ef..473245d5a 100644 --- a/src/core/file_sys/content_archive.cpp +++ b/src/core/file_sys/content_archive.cpp @@ -118,9 +118,8 @@ static bool IsValidNCA(const NCAHeader& header) {      return header.magic == Common::MakeMagic('N', 'C', 'A', '3');  } -NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_offset, -         Core::Crypto::KeyManager keys_) -    : file(std::move(file_)), bktr_base_romfs(std::move(bktr_base_romfs_)), keys(std::move(keys_)) { +NCA::NCA(VirtualFile file_, VirtualFile bktr_base_romfs_, u64 bktr_base_ivfc_offset) +    : file(std::move(file_)), bktr_base_romfs(std::move(bktr_base_romfs_)) {      if (file == nullptr) {          status = Loader::ResultStatus::ErrorNullFile;          return; diff --git a/src/core/file_sys/content_archive.h b/src/core/file_sys/content_archive.h index e249079b5..d25cbcf91 100644 --- a/src/core/file_sys/content_archive.h +++ b/src/core/file_sys/content_archive.h @@ -99,8 +99,7 @@ inline bool IsDirectoryLogoPartition(const VirtualDir& pfs) {  class NCA : public ReadOnlyVfsDirectory {  public:      explicit NCA(VirtualFile file, VirtualFile bktr_base_romfs = nullptr, -                 u64 bktr_base_ivfc_offset = 0, -                 Core::Crypto::KeyManager keys = Core::Crypto::KeyManager()); +                 u64 bktr_base_ivfc_offset = 0);      ~NCA() override;      Loader::ResultStatus GetStatus() const; @@ -159,7 +158,7 @@ private:      bool encrypted = false;      bool is_update = false; -    Core::Crypto::KeyManager keys; +    Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();  };  } // namespace FileSys diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp index ba5f76288..27c1b0233 100644 --- a/src/core/file_sys/registered_cache.cpp +++ b/src/core/file_sys/registered_cache.cpp @@ -408,7 +408,7 @@ void RegisteredCache::ProcessFiles(const std::vector<NcaID>& ids) {          if (file == nullptr)              continue; -        const auto nca = std::make_shared<NCA>(parser(file, id), nullptr, 0, keys); +        const auto nca = std::make_shared<NCA>(parser(file, id), nullptr, 0);          if (nca->GetStatus() != Loader::ResultStatus::Success ||              nca->GetType() != NCAContentType::Meta) {              continue; @@ -486,7 +486,7 @@ std::unique_ptr<NCA> RegisteredCache::GetEntry(u64 title_id, ContentRecordType t      const auto raw = GetEntryRaw(title_id, type);      if (raw == nullptr)          return nullptr; -    return std::make_unique<NCA>(raw, nullptr, 0, keys); +    return std::make_unique<NCA>(raw, nullptr, 0);  }  template <typename T> @@ -865,7 +865,7 @@ std::unique_ptr<NCA> ManualContentProvider::GetEntry(u64 title_id, ContentRecord      const auto res = GetEntryRaw(title_id, type);      if (res == nullptr)          return nullptr; -    return std::make_unique<NCA>(res, nullptr, 0, keys); +    return std::make_unique<NCA>(res, nullptr, 0);  }  std::vector<ContentProviderEntry> ManualContentProvider::ListEntriesFilter( diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h index d1eec240e..f339cd17b 100644 --- a/src/core/file_sys/registered_cache.h +++ b/src/core/file_sys/registered_cache.h @@ -88,7 +88,7 @@ public:  protected:      // A single instance of KeyManager to be used by GetEntry() -    Core::Crypto::KeyManager keys; +    Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();  };  class PlaceholderCache { diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp index ef3084681..175a8266a 100644 --- a/src/core/file_sys/submission_package.cpp +++ b/src/core/file_sys/submission_package.cpp @@ -21,7 +21,7 @@  namespace FileSys {  namespace {  void SetTicketKeys(const std::vector<VirtualFile>& files) { -    Core::Crypto::KeyManager keys; +    auto& keys = Core::Crypto::KeyManager::Instance();      for (const auto& ticket_file : files) {          if (ticket_file == nullptr) { @@ -285,7 +285,7 @@ void NSP::ReadNCAs(const std::vector<VirtualFile>& files) {                      continue;                  } -                auto next_nca = std::make_shared<NCA>(std::move(next_file), nullptr, 0, keys); +                auto next_nca = std::make_shared<NCA>(std::move(next_file), nullptr, 0);                  if (next_nca->GetType() == NCAContentType::Program) {                      program_status[cnmt.GetTitleID()] = next_nca->GetStatus();                  } diff --git a/src/core/file_sys/submission_package.h b/src/core/file_sys/submission_package.h index ee9b6ce17..cf89de6a9 100644 --- a/src/core/file_sys/submission_package.h +++ b/src/core/file_sys/submission_package.h @@ -73,7 +73,7 @@ private:      std::map<u64, std::map<std::pair<TitleType, ContentRecordType>, std::shared_ptr<NCA>>> ncas;      std::vector<VirtualFile> ticket_files; -    Core::Crypto::KeyManager keys; +    Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();      VirtualFile romfs;      VirtualDir exefs; diff --git a/src/core/file_sys/xts_archive.h b/src/core/file_sys/xts_archive.h index 7704dee90..563531bb6 100644 --- a/src/core/file_sys/xts_archive.h +++ b/src/core/file_sys/xts_archive.h @@ -62,6 +62,6 @@ private:      VirtualFile dec_file; -    Core::Crypto::KeyManager keys; +    Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();  };  } // namespace FileSys diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index 70c0f8b80..79f22a403 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -35,7 +35,6 @@  #include "common/swap.h"  #include "core/arm/arm_interface.h"  #include "core/core.h" -#include "core/core_manager.h"  #include "core/gdbstub/gdbstub.h"  #include "core/hle/kernel/memory/page_table.h"  #include "core/hle/kernel/process.h" diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h index b04e046ed..456b41e1b 100644 --- a/src/core/hardware_properties.h +++ b/src/core/hardware_properties.h @@ -42,6 +42,10 @@ struct EmuThreadHandle {          constexpr u32 invalid_handle = 0xFFFFFFFF;          return {invalid_handle, invalid_handle};      } + +    bool IsInvalid() const { +        return (*this) == InvalidHandle(); +    }  };  } // namespace Core diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 8475b698c..4d2a9b35d 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -7,11 +7,15 @@  #include "common/assert.h"  #include "common/common_types.h" +#include "core/arm/exclusive_monitor.h"  #include "core/core.h"  #include "core/hle/kernel/address_arbiter.h"  #include "core/hle/kernel/errors.h" +#include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/kernel.h"  #include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h"  #include "core/hle/result.h"  #include "core/memory.h" @@ -20,6 +24,7 @@ namespace Kernel {  // Wake up num_to_wake (or all) threads in a vector.  void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,                                   s32 num_to_wake) { +    auto& time_manager = system.Kernel().TimeManager();      // Only process up to 'target' threads, unless 'target' is <= 0, in which case process      // them all.      std::size_t last = waiting_threads.size(); @@ -29,12 +34,10 @@ void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& wai      // Signal the waiting threads.      for (std::size_t i = 0; i < last; i++) { -        ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb); -        waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS); +        waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS);          RemoveThread(waiting_threads[i]); -        waiting_threads[i]->SetArbiterWaitAddress(0); +        waiting_threads[i]->WaitForArbitration(false);          waiting_threads[i]->ResumeFromWait(); -        system.PrepareReschedule(waiting_threads[i]->GetProcessorID());      }  } @@ -56,6 +59,7 @@ ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 v  }  ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { +    SchedulerLock lock(system.Kernel());      const std::vector<std::shared_ptr<Thread>> waiting_threads =          GetThreadsWaitingOnAddress(address);      WakeThreads(waiting_threads, num_to_wake); @@ -64,6 +68,7 @@ ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {  ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,                                                                s32 num_to_wake) { +    SchedulerLock lock(system.Kernel());      auto& memory = system.Memory();      // Ensure that we can write to the address. @@ -71,16 +76,24 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32          return ERR_INVALID_ADDRESS_STATE;      } -    if (static_cast<s32>(memory.Read32(address)) != value) { -        return ERR_INVALID_STATE; -    } +    const std::size_t current_core = system.CurrentCoreIndex(); +    auto& monitor = system.Monitor(); +    u32 current_value; +    do { +        current_value = monitor.ExclusiveRead32(current_core, address); + +        if (current_value != value) { +            return ERR_INVALID_STATE; +        } +        current_value++; +    } while (!monitor.ExclusiveWrite32(current_core, address, current_value)); -    memory.Write32(address, static_cast<u32>(value + 1));      return SignalToAddressOnly(address, num_to_wake);  }  ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,                                                                           s32 num_to_wake) { +    SchedulerLock lock(system.Kernel());      auto& memory = system.Memory();      // Ensure that we can write to the address. @@ -92,29 +105,33 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a      const std::vector<std::shared_ptr<Thread>> waiting_threads =          GetThreadsWaitingOnAddress(address); -    // Determine the modified value depending on the waiting count. +    const std::size_t current_core = system.CurrentCoreIndex(); +    auto& monitor = system.Monitor();      s32 updated_value; -    if (num_to_wake <= 0) { -        if (waiting_threads.empty()) { -            updated_value = value + 1; -        } else { -            updated_value = value - 1; +    do { +        updated_value = monitor.ExclusiveRead32(current_core, address); + +        if (updated_value != value) { +            return ERR_INVALID_STATE;          } -    } else { -        if (waiting_threads.empty()) { -            updated_value = value + 1; -        } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) { -            updated_value = value - 1; +        // Determine the modified value depending on the waiting count. +        if (num_to_wake <= 0) { +            if (waiting_threads.empty()) { +                updated_value = value + 1; +            } else { +                updated_value = value - 1; +            }          } else { -            updated_value = value; +            if (waiting_threads.empty()) { +                updated_value = value + 1; +            } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) { +                updated_value = value - 1; +            } else { +                updated_value = value; +            }          } -    } +    } while (!monitor.ExclusiveWrite32(current_core, address, updated_value)); -    if (static_cast<s32>(memory.Read32(address)) != value) { -        return ERR_INVALID_STATE; -    } - -    memory.Write32(address, static_cast<u32>(updated_value));      WakeThreads(waiting_threads, num_to_wake);      return RESULT_SUCCESS;  } @@ -136,60 +153,127 @@ ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s  ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,                                                      bool should_decrement) {      auto& memory = system.Memory(); +    auto& kernel = system.Kernel(); +    Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); -    // Ensure that we can read the address. -    if (!memory.IsValidVirtualAddress(address)) { -        return ERR_INVALID_ADDRESS_STATE; -    } +    Handle event_handle = InvalidHandle; +    { +        SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); + +        if (current_thread->IsPendingTermination()) { +            lock.CancelSleep(); +            return ERR_THREAD_TERMINATING; +        } + +        // Ensure that we can read the address. +        if (!memory.IsValidVirtualAddress(address)) { +            lock.CancelSleep(); +            return ERR_INVALID_ADDRESS_STATE; +        } + +        s32 current_value = static_cast<s32>(memory.Read32(address)); +        if (current_value >= value) { +            lock.CancelSleep(); +            return ERR_INVALID_STATE; +        } + +        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); + +        s32 decrement_value; + +        const std::size_t current_core = system.CurrentCoreIndex(); +        auto& monitor = system.Monitor(); +        do { +            current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address)); +            if (should_decrement) { +                decrement_value = current_value - 1; +            } else { +                decrement_value = current_value; +            } +        } while ( +            !monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value))); + +        // Short-circuit without rescheduling, if timeout is zero. +        if (timeout == 0) { +            lock.CancelSleep(); +            return RESULT_TIMEOUT; +        } -    const s32 cur_value = static_cast<s32>(memory.Read32(address)); -    if (cur_value >= value) { -        return ERR_INVALID_STATE; +        current_thread->SetArbiterWaitAddress(address); +        InsertThread(SharedFrom(current_thread)); +        current_thread->SetStatus(ThreadStatus::WaitArb); +        current_thread->WaitForArbitration(true);      } -    if (should_decrement) { -        memory.Write32(address, static_cast<u32>(cur_value - 1)); +    if (event_handle != InvalidHandle) { +        auto& time_manager = kernel.TimeManager(); +        time_manager.UnscheduleTimeEvent(event_handle);      } -    // Short-circuit without rescheduling, if timeout is zero. -    if (timeout == 0) { -        return RESULT_TIMEOUT; +    { +        SchedulerLock lock(kernel); +        if (current_thread->IsWaitingForArbitration()) { +            RemoveThread(SharedFrom(current_thread)); +            current_thread->WaitForArbitration(false); +        }      } -    return WaitForAddressImpl(address, timeout); +    return current_thread->GetSignalingResult();  }  ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {      auto& memory = system.Memory(); +    auto& kernel = system.Kernel(); +    Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); -    // Ensure that we can read the address. -    if (!memory.IsValidVirtualAddress(address)) { -        return ERR_INVALID_ADDRESS_STATE; -    } +    Handle event_handle = InvalidHandle; +    { +        SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout); + +        if (current_thread->IsPendingTermination()) { +            lock.CancelSleep(); +            return ERR_THREAD_TERMINATING; +        } + +        // Ensure that we can read the address. +        if (!memory.IsValidVirtualAddress(address)) { +            lock.CancelSleep(); +            return ERR_INVALID_ADDRESS_STATE; +        } -    // Only wait for the address if equal. -    if (static_cast<s32>(memory.Read32(address)) != value) { -        return ERR_INVALID_STATE; +        s32 current_value = static_cast<s32>(memory.Read32(address)); +        if (current_value != value) { +            lock.CancelSleep(); +            return ERR_INVALID_STATE; +        } + +        // Short-circuit without rescheduling, if timeout is zero. +        if (timeout == 0) { +            lock.CancelSleep(); +            return RESULT_TIMEOUT; +        } + +        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); +        current_thread->SetArbiterWaitAddress(address); +        InsertThread(SharedFrom(current_thread)); +        current_thread->SetStatus(ThreadStatus::WaitArb); +        current_thread->WaitForArbitration(true);      } -    // Short-circuit without rescheduling if timeout is zero. -    if (timeout == 0) { -        return RESULT_TIMEOUT; +    if (event_handle != InvalidHandle) { +        auto& time_manager = kernel.TimeManager(); +        time_manager.UnscheduleTimeEvent(event_handle);      } -    return WaitForAddressImpl(address, timeout); -} +    { +        SchedulerLock lock(kernel); +        if (current_thread->IsWaitingForArbitration()) { +            RemoveThread(SharedFrom(current_thread)); +            current_thread->WaitForArbitration(false); +        } +    } -ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { -    Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); -    current_thread->SetArbiterWaitAddress(address); -    InsertThread(SharedFrom(current_thread)); -    current_thread->SetStatus(ThreadStatus::WaitArb); -    current_thread->InvalidateWakeupCallback(); -    current_thread->WakeAfterDelay(timeout); - -    system.PrepareReschedule(current_thread->GetProcessorID()); -    return RESULT_TIMEOUT; +    return current_thread->GetSignalingResult();  }  void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { @@ -221,9 +305,9 @@ void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {      const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),                                     [&thread](const auto& entry) { return thread == entry; }); -    ASSERT(iter != thread_list.cend()); - -    thread_list.erase(iter); +    if (iter != thread_list.cend()) { +        thread_list.erase(iter); +    }  }  std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index f958eee5a..0b05d533c 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -73,9 +73,6 @@ private:      /// Waits on an address if the value passed is equal to the argument value.      ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); -    // Waits on the given address with a timeout in nanoseconds -    ResultCode WaitForAddressImpl(VAddr address, s64 timeout); -      /// Wake up num_to_wake (or all) threads in a vector.      void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake); diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp index 5498fd313..8aff2227a 100644 --- a/src/core/hle/kernel/client_port.cpp +++ b/src/core/hle/kernel/client_port.cpp @@ -34,7 +34,7 @@ ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {      }      // Wake the threads waiting on the ServerPort -    server_port->WakeupAllWaitingThreads(); +    server_port->Signal();      return MakeResult(std::move(client));  } diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h index 29bfa3621..d4e5d88cf 100644 --- a/src/core/hle/kernel/errors.h +++ b/src/core/hle/kernel/errors.h @@ -12,6 +12,7 @@ namespace Kernel {  constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};  constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; +constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};  constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};  constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};  constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103}; diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index ba0eac4c2..9277b5d08 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -14,14 +14,17 @@  #include "common/common_types.h"  #include "common/logging/log.h"  #include "core/hle/ipc_helpers.h" +#include "core/hle/kernel/errors.h"  #include "core/hle/kernel/handle_table.h"  #include "core/hle/kernel/hle_ipc.h"  #include "core/hle/kernel/kernel.h"  #include "core/hle/kernel/object.h"  #include "core/hle/kernel/process.h"  #include "core/hle/kernel/readable_event.h" +#include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/server_session.h"  #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h"  #include "core/hle/kernel/writable_event.h"  #include "core/memory.h" @@ -46,15 +49,6 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(      const std::string& reason, u64 timeout, WakeupCallback&& callback,      std::shared_ptr<WritableEvent> writable_event) {      // Put the client thread to sleep until the wait event is signaled or the timeout expires. -    thread->SetWakeupCallback( -        [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread, -                                    std::shared_ptr<SynchronizationObject> object, -                                    std::size_t index) mutable -> bool { -            ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent); -            callback(thread, context, reason); -            context.WriteToOutgoingCommandBuffer(*thread); -            return true; -        });      if (!writable_event) {          // Create event if not provided @@ -62,14 +56,26 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(          writable_event = pair.writable;      } -    const auto readable_event{writable_event->GetReadableEvent()}; -    writable_event->Clear(); -    thread->SetStatus(ThreadStatus::WaitHLEEvent); -    thread->SetSynchronizationObjects({readable_event}); -    readable_event->AddWaitingThread(thread); - -    if (timeout > 0) { -        thread->WakeAfterDelay(timeout); +    { +        Handle event_handle = InvalidHandle; +        SchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout); +        thread->SetHLECallback( +            [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool { +                ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT +                                                ? ThreadWakeupReason::Timeout +                                                : ThreadWakeupReason::Signal; +                callback(thread, context, reason); +                context.WriteToOutgoingCommandBuffer(*thread); +                return true; +            }); +        const auto readable_event{writable_event->GetReadableEvent()}; +        writable_event->Clear(); +        thread->SetHLESyncObject(readable_event.get()); +        thread->SetStatus(ThreadStatus::WaitHLEEvent); +        thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); +        readable_event->AddWaitingThread(thread); +        lock.Release(); +        thread->SetHLETimeEvent(event_handle);      }      is_thread_waiting = true; @@ -282,18 +288,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {  }  std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { -    std::vector<u8> buffer; +    std::vector<u8> buffer{};      const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&                             BufferDescriptorA()[buffer_index].Size()};      if (is_buffer_a) { -        ASSERT_MSG(BufferDescriptorA().size() > buffer_index, -                   "BufferDescriptorA invalid buffer_index {}", buffer_index); +        ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return buffer; }, +                              "BufferDescriptorA invalid buffer_index {}", buffer_index);          buffer.resize(BufferDescriptorA()[buffer_index].Size());          memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());      } else { -        ASSERT_MSG(BufferDescriptorX().size() > buffer_index, -                   "BufferDescriptorX invalid buffer_index {}", buffer_index); +        ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return buffer; }, +                              "BufferDescriptorX invalid buffer_index {}", buffer_index);          buffer.resize(BufferDescriptorX()[buffer_index].Size());          memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());      } @@ -318,16 +324,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,      }      if (is_buffer_b) { -        ASSERT_MSG(BufferDescriptorB().size() > buffer_index, -                   "BufferDescriptorB invalid buffer_index {}", buffer_index); -        ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size, -                   "BufferDescriptorB buffer_index {} is not large enough", buffer_index); +        ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index && +                                  BufferDescriptorB()[buffer_index].Size() >= size, +                              { return 0; }, "BufferDescriptorB is invalid, index={}, size={}", +                              buffer_index, size);          memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);      } else { -        ASSERT_MSG(BufferDescriptorC().size() > buffer_index, -                   "BufferDescriptorC invalid buffer_index {}", buffer_index); -        ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size, -                   "BufferDescriptorC buffer_index {} is not large enough", buffer_index); +        ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index && +                                  BufferDescriptorC()[buffer_index].Size() >= size, +                              { return 0; }, "BufferDescriptorC is invalid, index={}, size={}", +                              buffer_index, size);          memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);      } @@ -338,16 +344,12 @@ std::size_t HLERequestContext::GetReadBufferSize(std::size_t buffer_index) const      const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&                             BufferDescriptorA()[buffer_index].Size()};      if (is_buffer_a) { -        ASSERT_MSG(BufferDescriptorA().size() > buffer_index, -                   "BufferDescriptorA invalid buffer_index {}", buffer_index); -        ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0, -                   "BufferDescriptorA buffer_index {} is empty", buffer_index); +        ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return 0; }, +                              "BufferDescriptorA invalid buffer_index {}", buffer_index);          return BufferDescriptorA()[buffer_index].Size();      } else { -        ASSERT_MSG(BufferDescriptorX().size() > buffer_index, -                   "BufferDescriptorX invalid buffer_index {}", buffer_index); -        ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0, -                   "BufferDescriptorX buffer_index {} is empty", buffer_index); +        ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return 0; }, +                              "BufferDescriptorX invalid buffer_index {}", buffer_index);          return BufferDescriptorX()[buffer_index].Size();      }  } @@ -356,14 +358,15 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons      const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&                             BufferDescriptorB()[buffer_index].Size()};      if (is_buffer_b) { -        ASSERT_MSG(BufferDescriptorB().size() > buffer_index, -                   "BufferDescriptorB invalid buffer_index {}", buffer_index); +        ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index, { return 0; }, +                              "BufferDescriptorB invalid buffer_index {}", buffer_index);          return BufferDescriptorB()[buffer_index].Size();      } else { -        ASSERT_MSG(BufferDescriptorC().size() > buffer_index, -                   "BufferDescriptorC invalid buffer_index {}", buffer_index); +        ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index, { return 0; }, +                              "BufferDescriptorC invalid buffer_index {}", buffer_index);          return BufferDescriptorC()[buffer_index].Size();      } +    return 0;  }  std::string HLERequestContext::Description() const { diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 7655382fa..1f2af7a1b 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -2,6 +2,7 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <array>  #include <atomic>  #include <bitset>  #include <functional> @@ -13,11 +14,15 @@  #include "common/assert.h"  #include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/thread.h"  #include "core/arm/arm_interface.h" +#include "core/arm/cpu_interrupt_handler.h"  #include "core/arm/exclusive_monitor.h"  #include "core/core.h"  #include "core/core_timing.h"  #include "core/core_timing_util.h" +#include "core/cpu_manager.h"  #include "core/device_memory.h"  #include "core/hardware_properties.h"  #include "core/hle/kernel/client_port.h" @@ -39,85 +44,28 @@  #include "core/hle/result.h"  #include "core/memory.h" -namespace Kernel { - -/** - * Callback that will wake up the thread it was scheduled for - * @param thread_handle The handle of the thread that's been awoken - * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time - */ -static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) { -    const auto proper_handle = static_cast<Handle>(thread_handle); -    const auto& system = Core::System::GetInstance(); - -    // Lock the global kernel mutex when we enter the kernel HLE. -    std::lock_guard lock{HLE::g_hle_lock}; - -    std::shared_ptr<Thread> thread = -        system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); -    if (thread == nullptr) { -        LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle); -        return; -    } - -    bool resume = true; - -    if (thread->GetStatus() == ThreadStatus::WaitSynch || -        thread->GetStatus() == ThreadStatus::WaitHLEEvent) { -        // Remove the thread from each of its waiting objects' waitlists -        for (const auto& object : thread->GetSynchronizationObjects()) { -            object->RemoveWaitingThread(thread); -        } -        thread->ClearSynchronizationObjects(); - -        // Invoke the wakeup callback before clearing the wait objects -        if (thread->HasWakeupCallback()) { -            resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Timeout, thread, nullptr, 0); -        } -    } else if (thread->GetStatus() == ThreadStatus::WaitMutex || -               thread->GetStatus() == ThreadStatus::WaitCondVar) { -        thread->SetMutexWaitAddress(0); -        thread->SetWaitHandle(0); -        if (thread->GetStatus() == ThreadStatus::WaitCondVar) { -            thread->GetOwnerProcess()->RemoveConditionVariableThread(thread); -            thread->SetCondVarWaitAddress(0); -        } - -        auto* const lock_owner = thread->GetLockOwner(); -        // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance -        // and don't have a lock owner unless SignalProcessWideKey was called first and the thread -        // wasn't awakened due to the mutex already being acquired. -        if (lock_owner != nullptr) { -            lock_owner->RemoveMutexWaiter(thread); -        } -    } +MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); -    if (thread->GetStatus() == ThreadStatus::WaitArb) { -        auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter(); -        address_arbiter.HandleWakeupThread(thread); -    } - -    if (resume) { -        if (thread->GetStatus() == ThreadStatus::WaitCondVar || -            thread->GetStatus() == ThreadStatus::WaitArb) { -            thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); -        } -        thread->ResumeFromWait(); -    } -} +namespace Kernel {  struct KernelCore::Impl {      explicit Impl(Core::System& system, KernelCore& kernel)          : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {} +    void SetMulticore(bool is_multicore) { +        this->is_multicore = is_multicore; +    } +      void Initialize(KernelCore& kernel) {          Shutdown(); +        RegisterHostThread();          InitializePhysicalCores();          InitializeSystemResourceLimit(kernel);          InitializeMemoryLayout(); -        InitializeThreads(); -        InitializePreemption(); +        InitializePreemption(kernel); +        InitializeSchedulers(); +        InitializeSuspendThreads();      }      void Shutdown() { @@ -126,13 +74,26 @@ struct KernelCore::Impl {          next_user_process_id = Process::ProcessIDMin;          next_thread_id = 1; +        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { +            if (suspend_threads[i]) { +                suspend_threads[i].reset(); +            } +        } + +        for (std::size_t i = 0; i < cores.size(); i++) { +            cores[i].Shutdown(); +            schedulers[i].reset(); +        } +        cores.clear(); + +        registered_core_threads.reset(); +          process_list.clear();          current_process = nullptr;          system_resource_limit = nullptr;          global_handle_table.Clear(); -        thread_wakeup_event_type = nullptr;          preemption_event = nullptr;          global_scheduler.Shutdown(); @@ -145,13 +106,21 @@ struct KernelCore::Impl {          cores.clear();          exclusive_monitor.reset(); +        host_thread_ids.clear();      }      void InitializePhysicalCores() {          exclusive_monitor =              Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);          for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { -            cores.emplace_back(system, i, *exclusive_monitor); +            schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i); +            cores.emplace_back(system, i, *schedulers[i], interrupts[i]); +        } +    } + +    void InitializeSchedulers() { +        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { +            cores[i].Scheduler().Initialize();          }      } @@ -173,15 +142,13 @@ struct KernelCore::Impl {          }      } -    void InitializeThreads() { -        thread_wakeup_event_type = -            Core::Timing::CreateEvent("ThreadWakeupCallback", ThreadWakeupCallback); -    } - -    void InitializePreemption() { -        preemption_event = -            Core::Timing::CreateEvent("PreemptionCallback", [this](u64 userdata, s64 cycles_late) { -                global_scheduler.PreemptThreads(); +    void InitializePreemption(KernelCore& kernel) { +        preemption_event = Core::Timing::CreateEvent( +            "PreemptionCallback", [this, &kernel](u64 userdata, s64 cycles_late) { +                { +                    SchedulerLock lock(kernel); +                    global_scheduler.PreemptThreads(); +                }                  s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));                  system.CoreTiming().ScheduleEvent(time_interval, preemption_event);              }); @@ -190,6 +157,20 @@ struct KernelCore::Impl {          system.CoreTiming().ScheduleEvent(time_interval, preemption_event);      } +    void InitializeSuspendThreads() { +        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { +            std::string name = "Suspend Thread Id:" + std::to_string(i); +            std::function<void(void*)> init_func = +                system.GetCpuManager().GetSuspendThreadStartFunc(); +            void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); +            ThreadType type = +                static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_SUSPEND); +            auto thread_res = Thread::Create(system, type, name, 0, 0, 0, static_cast<u32>(i), 0, +                                             nullptr, std::move(init_func), init_func_parameter); +            suspend_threads[i] = std::move(thread_res).Unwrap(); +        } +    } +      void MakeCurrentProcess(Process* process) {          current_process = process; @@ -197,15 +178,17 @@ struct KernelCore::Impl {              return;          } -        for (auto& core : cores) { -            core.SetIs64Bit(process->Is64BitProcess()); +        u32 core_id = GetCurrentHostThreadID(); +        if (core_id < Core::Hardware::NUM_CPU_CORES) { +            system.Memory().SetCurrentPageTable(*process, core_id);          } - -        system.Memory().SetCurrentPageTable(*process);      }      void RegisterCoreThread(std::size_t core_id) {          std::unique_lock lock{register_thread_mutex}; +        if (!is_multicore) { +            single_core_thread_id = std::this_thread::get_id(); +        }          const std::thread::id this_id = std::this_thread::get_id();          const auto it = host_thread_ids.find(this_id);          ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); @@ -219,12 +202,19 @@ struct KernelCore::Impl {          std::unique_lock lock{register_thread_mutex};          const std::thread::id this_id = std::this_thread::get_id();          const auto it = host_thread_ids.find(this_id); -        ASSERT(it == host_thread_ids.end()); +        if (it != host_thread_ids.end()) { +            return; +        }          host_thread_ids[this_id] = registered_thread_ids++;      }      u32 GetCurrentHostThreadID() const {          const std::thread::id this_id = std::this_thread::get_id(); +        if (!is_multicore) { +            if (single_core_thread_id == this_id) { +                return static_cast<u32>(system.GetCpuManager().CurrentCore()); +            } +        }          const auto it = host_thread_ids.find(this_id);          if (it == host_thread_ids.end()) {              return Core::INVALID_HOST_THREAD_ID; @@ -240,7 +230,7 @@ struct KernelCore::Impl {          }          const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler();          const Kernel::Thread* current = sched.GetCurrentThread(); -        if (current != nullptr) { +        if (current != nullptr && !current->IsPhantomMode()) {              result.guest_handle = current->GetGlobalHandle();          } else {              result.guest_handle = InvalidHandle; @@ -313,7 +303,6 @@ struct KernelCore::Impl {      std::shared_ptr<ResourceLimit> system_resource_limit; -    std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type;      std::shared_ptr<Core::Timing::EventType> preemption_event;      // This is the kernel's handle table or supervisor handle table which @@ -343,6 +332,15 @@ struct KernelCore::Impl {      std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;      std::shared_ptr<Kernel::SharedMemory> time_shared_mem; +    std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; +    std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{}; +    std::array<std::unique_ptr<Kernel::Scheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; + +    bool is_multicore{}; +    std::thread::id single_core_thread_id{}; + +    std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{}; +      // System context      Core::System& system;  }; @@ -352,6 +350,10 @@ KernelCore::~KernelCore() {      Shutdown();  } +void KernelCore::SetMulticore(bool is_multicore) { +    impl->SetMulticore(is_multicore); +} +  void KernelCore::Initialize() {      impl->Initialize(*this);  } @@ -397,11 +399,11 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {  }  Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) { -    return impl->cores[id].Scheduler(); +    return *impl->schedulers[id];  }  const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const { -    return impl->cores[id].Scheduler(); +    return *impl->schedulers[id];  }  Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) { @@ -412,6 +414,39 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {      return impl->cores[id];  } +Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() { +    u32 core_id = impl->GetCurrentHostThreadID(); +    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); +    return impl->cores[core_id]; +} + +const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const { +    u32 core_id = impl->GetCurrentHostThreadID(); +    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); +    return impl->cores[core_id]; +} + +Kernel::Scheduler& KernelCore::CurrentScheduler() { +    u32 core_id = impl->GetCurrentHostThreadID(); +    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); +    return *impl->schedulers[core_id]; +} + +const Kernel::Scheduler& KernelCore::CurrentScheduler() const { +    u32 core_id = impl->GetCurrentHostThreadID(); +    ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); +    return *impl->schedulers[core_id]; +} + +std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() { +    return impl->interrupts; +} + +const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() +    const { +    return impl->interrupts; +} +  Kernel::Synchronization& KernelCore::Synchronization() {      return impl->synchronization;  } @@ -437,15 +472,17 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {  }  void KernelCore::InvalidateAllInstructionCaches() { -    for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) { -        PhysicalCore(i).ArmInterface().ClearInstructionCache(); +    auto& threads = GlobalScheduler().GetThreadList(); +    for (auto& thread : threads) { +        if (!thread->IsHLEThread()) { +            auto& arm_interface = thread->ArmInterface(); +            arm_interface.ClearInstructionCache(); +        }      }  }  void KernelCore::PrepareReschedule(std::size_t id) { -    if (id < impl->global_scheduler.CpuCoresCount()) { -        impl->cores[id].Stop(); -    } +    // TODO: Reimplement, this  }  void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) { @@ -481,10 +518,6 @@ u64 KernelCore::CreateNewUserProcessID() {      return impl->next_user_process_id++;  } -const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallbackEventType() const { -    return impl->thread_wakeup_event_type; -} -  Kernel::HandleTable& KernelCore::GlobalHandleTable() {      return impl->global_handle_table;  } @@ -557,4 +590,34 @@ const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const {      return *impl->time_shared_mem;  } +void KernelCore::Suspend(bool in_suspention) { +    const bool should_suspend = exception_exited || in_suspention; +    { +        SchedulerLock lock(*this); +        ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep; +        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { +            impl->suspend_threads[i]->SetStatus(status); +        } +    } +} + +bool KernelCore::IsMulticore() const { +    return impl->is_multicore; +} + +void KernelCore::ExceptionalExit() { +    exception_exited = true; +    Suspend(true); +} + +void KernelCore::EnterSVCProfile() { +    std::size_t core = impl->GetCurrentHostThreadID(); +    impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC)); +} + +void KernelCore::ExitSVCProfile() { +    std::size_t core = impl->GetCurrentHostThreadID(); +    MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); +} +  } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 83de1f542..49bd47e89 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -4,15 +4,17 @@  #pragma once +#include <array>  #include <memory>  #include <string>  #include <unordered_map>  #include <vector> +#include "core/hardware_properties.h"  #include "core/hle/kernel/memory/memory_types.h"  #include "core/hle/kernel/object.h"  namespace Core { -struct EmuThreadHandle; +class CPUInterruptHandler;  class ExclusiveMonitor;  class System;  } // namespace Core @@ -65,6 +67,9 @@ public:      KernelCore(KernelCore&&) = delete;      KernelCore& operator=(KernelCore&&) = delete; +    /// Sets if emulation is multicore or single core, must be set before Initialize +    void SetMulticore(bool is_multicore); +      /// Resets the kernel to a clean slate for use.      void Initialize(); @@ -110,6 +115,18 @@ public:      /// Gets the an instance of the respective physical CPU core.      const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const; +    /// Gets the sole instance of the Scheduler at the current running core. +    Kernel::Scheduler& CurrentScheduler(); + +    /// Gets the sole instance of the Scheduler at the current running core. +    const Kernel::Scheduler& CurrentScheduler() const; + +    /// Gets the an instance of the current physical CPU core. +    Kernel::PhysicalCore& CurrentPhysicalCore(); + +    /// Gets the an instance of the current physical CPU core. +    const Kernel::PhysicalCore& CurrentPhysicalCore() const; +      /// Gets the an instance of the Synchronization Interface.      Kernel::Synchronization& Synchronization(); @@ -129,6 +146,10 @@ public:      const Core::ExclusiveMonitor& GetExclusiveMonitor() const; +    std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts(); + +    const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts() const; +      void InvalidateAllInstructionCaches();      /// Adds a port to the named port table @@ -191,6 +212,18 @@ public:      /// Gets the shared memory object for Time services.      const Kernel::SharedMemory& GetTimeSharedMem() const; +    /// Suspend/unsuspend the OS. +    void Suspend(bool in_suspention); + +    /// Exceptional exit the OS. +    void ExceptionalExit(); + +    bool IsMulticore() const; + +    void EnterSVCProfile(); + +    void ExitSVCProfile(); +  private:      friend class Object;      friend class Process; @@ -208,9 +241,6 @@ private:      /// Creates a new thread ID, incrementing the internal thread ID counter.      u64 CreateNewThreadID(); -    /// Retrieves the event type used for thread wakeup callbacks. -    const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const; -      /// Provides a reference to the global handle table.      Kernel::HandleTable& GlobalHandleTable(); @@ -219,6 +249,7 @@ private:      struct Impl;      std::unique_ptr<Impl> impl; +    bool exception_exited{};  };  } // namespace Kernel diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp index 616148190..acf13585c 100644 --- a/src/core/hle/kernel/memory/memory_manager.cpp +++ b/src/core/hle/kernel/memory/memory_manager.cpp @@ -139,7 +139,6 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa      }      // Only succeed if we allocated as many pages as we wanted -    ASSERT(num_pages >= 0);      if (num_pages) {          return ERR_OUT_OF_MEMORY;      } diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 7869eb32b..8f6c944d1 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -34,8 +34,6 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr          if (thread->GetMutexWaitAddress() != mutex_addr)              continue; -        ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); -          ++num_waiters;          if (highest_priority_thread == nullptr ||              thread->GetPriority() < highest_priority_thread->GetPriority()) { @@ -49,6 +47,7 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr  /// Update the mutex owner field of all threads waiting on the mutex to point to the new owner.  static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread,                                     std::shared_ptr<Thread> new_owner) { +    current_thread->RemoveMutexWaiter(new_owner);      const auto threads = current_thread->GetMutexWaitingThreads();      for (const auto& thread : threads) {          if (thread->GetMutexWaitAddress() != mutex_addr) @@ -72,85 +71,100 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,          return ERR_INVALID_ADDRESS;      } -    const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); +    auto& kernel = system.Kernel();      std::shared_ptr<Thread> current_thread = -        SharedFrom(system.CurrentScheduler().GetCurrentThread()); -    std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle); -    std::shared_ptr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle); +        SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); +    { +        SchedulerLock lock(kernel); +        // The mutex address must be 4-byte aligned +        if ((address % sizeof(u32)) != 0) { +            return ERR_INVALID_ADDRESS; +        } -    // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another -    // thread. -    ASSERT(requesting_thread == current_thread); +        const auto& handle_table = kernel.CurrentProcess()->GetHandleTable(); +        std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle); +        std::shared_ptr<Thread> requesting_thread = +            handle_table.Get<Thread>(requesting_thread_handle); -    const u32 addr_value = system.Memory().Read32(address); +        // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of +        // another thread. +        ASSERT(requesting_thread == current_thread); -    // If the mutex isn't being held, just return success. -    if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) { -        return RESULT_SUCCESS; -    } +        current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS); -    if (holding_thread == nullptr) { -        LOG_ERROR(Kernel, "Holding thread does not exist! thread_handle={:08X}", -                  holding_thread_handle); -        return ERR_INVALID_HANDLE; -    } +        const u32 addr_value = system.Memory().Read32(address); + +        // If the mutex isn't being held, just return success. +        if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) { +            return RESULT_SUCCESS; +        } -    // Wait until the mutex is released -    current_thread->SetMutexWaitAddress(address); -    current_thread->SetWaitHandle(requesting_thread_handle); +        if (holding_thread == nullptr) { +            return ERR_INVALID_HANDLE; +        } -    current_thread->SetStatus(ThreadStatus::WaitMutex); -    current_thread->InvalidateWakeupCallback(); +        // Wait until the mutex is released +        current_thread->SetMutexWaitAddress(address); +        current_thread->SetWaitHandle(requesting_thread_handle); -    // Update the lock holder thread's priority to prevent priority inversion. -    holding_thread->AddMutexWaiter(current_thread); +        current_thread->SetStatus(ThreadStatus::WaitMutex); -    system.PrepareReschedule(); +        // Update the lock holder thread's priority to prevent priority inversion. +        holding_thread->AddMutexWaiter(current_thread); +    } -    return RESULT_SUCCESS; +    { +        SchedulerLock lock(kernel); +        auto* owner = current_thread->GetLockOwner(); +        if (owner != nullptr) { +            owner->RemoveMutexWaiter(current_thread); +        } +    } +    return current_thread->GetSignalingResult();  } -ResultCode Mutex::Release(VAddr address) { +std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner, +                                                             VAddr address) {      // The mutex address must be 4-byte aligned      if ((address % sizeof(u32)) != 0) {          LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address); -        return ERR_INVALID_ADDRESS; +        return {ERR_INVALID_ADDRESS, nullptr};      } -    std::shared_ptr<Thread> current_thread = -        SharedFrom(system.CurrentScheduler().GetCurrentThread()); -    auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address); - -    // There are no more threads waiting for the mutex, release it completely. -    if (thread == nullptr) { +    auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address); +    if (new_owner == nullptr) {          system.Memory().Write32(address, 0); -        return RESULT_SUCCESS; +        return {RESULT_SUCCESS, nullptr};      } -      // Transfer the ownership of the mutex from the previous owner to the new one. -    TransferMutexOwnership(address, current_thread, thread); - -    u32 mutex_value = thread->GetWaitHandle(); - +    TransferMutexOwnership(address, owner, new_owner); +    u32 mutex_value = new_owner->GetWaitHandle();      if (num_waiters >= 2) {          // Notify the guest that there are still some threads waiting for the mutex          mutex_value |= Mutex::MutexHasWaitersFlag;      } +    new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS); +    new_owner->SetLockOwner(nullptr); +    new_owner->ResumeFromWait(); -    // Grant the mutex to the next waiting thread and resume it.      system.Memory().Write32(address, mutex_value); +    return {RESULT_SUCCESS, new_owner}; +} -    ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex); -    thread->ResumeFromWait(); +ResultCode Mutex::Release(VAddr address) { +    auto& kernel = system.Kernel(); +    SchedulerLock lock(kernel); -    thread->SetLockOwner(nullptr); -    thread->SetCondVarWaitAddress(0); -    thread->SetMutexWaitAddress(0); -    thread->SetWaitHandle(0); -    thread->SetWaitSynchronizationResult(RESULT_SUCCESS); +    std::shared_ptr<Thread> current_thread = +        SharedFrom(kernel.CurrentScheduler().GetCurrentThread()); -    system.PrepareReschedule(); +    auto [result, new_owner] = Unlock(current_thread, address); -    return RESULT_SUCCESS; +    if (result != RESULT_SUCCESS && new_owner != nullptr) { +        new_owner->SetSynchronizationResults(nullptr, result); +    } + +    return result;  } +  } // namespace Kernel diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h index b904de2e8..3b81dc3df 100644 --- a/src/core/hle/kernel/mutex.h +++ b/src/core/hle/kernel/mutex.h @@ -28,6 +28,10 @@ public:      ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,                            Handle requesting_thread_handle); +    /// Unlocks a mutex for owner at address +    std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner, +                                                          VAddr address); +      /// Releases the mutex at the specified address.      ResultCode Release(VAddr address); diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp index a15011076..c6bbdb080 100644 --- a/src/core/hle/kernel/physical_core.cpp +++ b/src/core/hle/kernel/physical_core.cpp @@ -2,12 +2,15 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include "common/assert.h"  #include "common/logging/log.h" +#include "common/spin_lock.h"  #include "core/arm/arm_interface.h"  #ifdef ARCHITECTURE_x86_64  #include "core/arm/dynarmic/arm_dynarmic_32.h"  #include "core/arm/dynarmic/arm_dynarmic_64.h"  #endif +#include "core/arm/cpu_interrupt_handler.h"  #include "core/arm/exclusive_monitor.h"  #include "core/arm/unicorn/arm_unicorn.h"  #include "core/core.h" @@ -17,50 +20,37 @@  namespace Kernel { -PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, -                           Core::ExclusiveMonitor& exclusive_monitor) -    : core_index{id} { -#ifdef ARCHITECTURE_x86_64 -    arm_interface_32 = -        std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index); -    arm_interface_64 = -        std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index); - -#else -    using Core::ARM_Unicorn; -    arm_interface_32 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch32); -    arm_interface_64 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch64); -    LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); -#endif +PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler, +                           Core::CPUInterruptHandler& interrupt_handler) +    : interrupt_handler{interrupt_handler}, core_index{id}, scheduler{scheduler} { -    scheduler = std::make_unique<Kernel::Scheduler>(system, core_index); +    guard = std::make_unique<Common::SpinLock>();  }  PhysicalCore::~PhysicalCore() = default; -void PhysicalCore::Run() { -    arm_interface->Run(); -    arm_interface->ClearExclusiveState(); +void PhysicalCore::Idle() { +    interrupt_handler.AwaitInterrupt();  } -void PhysicalCore::Step() { -    arm_interface->Step(); +void PhysicalCore::Shutdown() { +    scheduler.Shutdown();  } -void PhysicalCore::Stop() { -    arm_interface->PrepareReschedule(); +bool PhysicalCore::IsInterrupted() const { +    return interrupt_handler.IsInterrupted();  } -void PhysicalCore::Shutdown() { -    scheduler->Shutdown(); +void PhysicalCore::Interrupt() { +    guard->lock(); +    interrupt_handler.SetInterrupt(true); +    guard->unlock();  } -void PhysicalCore::SetIs64Bit(bool is_64_bit) { -    if (is_64_bit) { -        arm_interface = arm_interface_64.get(); -    } else { -        arm_interface = arm_interface_32.get(); -    } +void PhysicalCore::ClearInterrupt() { +    guard->lock(); +    interrupt_handler.SetInterrupt(false); +    guard->unlock();  }  } // namespace Kernel diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h index 3269166be..d7a7a951c 100644 --- a/src/core/hle/kernel/physical_core.h +++ b/src/core/hle/kernel/physical_core.h @@ -7,12 +7,17 @@  #include <cstddef>  #include <memory> +namespace Common { +class SpinLock; +} +  namespace Kernel {  class Scheduler;  } // namespace Kernel  namespace Core {  class ARM_Interface; +class CPUInterruptHandler;  class ExclusiveMonitor;  class System;  } // namespace Core @@ -21,7 +26,8 @@ namespace Kernel {  class PhysicalCore {  public: -    PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor); +    PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler, +                 Core::CPUInterruptHandler& interrupt_handler);      ~PhysicalCore();      PhysicalCore(const PhysicalCore&) = delete; @@ -30,23 +36,18 @@ public:      PhysicalCore(PhysicalCore&&) = default;      PhysicalCore& operator=(PhysicalCore&&) = default; -    /// Execute current jit state -    void Run(); -    /// Execute a single instruction in current jit. -    void Step(); -    /// Stop JIT execution/exit -    void Stop(); +    void Idle(); +    /// Interrupt this physical core. +    void Interrupt(); -    // Shutdown this physical core. -    void Shutdown(); +    /// Clear this core's interrupt +    void ClearInterrupt(); -    Core::ARM_Interface& ArmInterface() { -        return *arm_interface; -    } +    /// Check if this core is interrupted +    bool IsInterrupted() const; -    const Core::ARM_Interface& ArmInterface() const { -        return *arm_interface; -    } +    // Shutdown this physical core. +    void Shutdown();      bool IsMainCore() const {          return core_index == 0; @@ -61,21 +62,18 @@ public:      }      Kernel::Scheduler& Scheduler() { -        return *scheduler; +        return scheduler;      }      const Kernel::Scheduler& Scheduler() const { -        return *scheduler; +        return scheduler;      } -    void SetIs64Bit(bool is_64_bit); -  private: +    Core::CPUInterruptHandler& interrupt_handler;      std::size_t core_index; -    std::unique_ptr<Core::ARM_Interface> arm_interface_32; -    std::unique_ptr<Core::ARM_Interface> arm_interface_64; -    std::unique_ptr<Kernel::Scheduler> scheduler; -    Core::ARM_Interface* arm_interface{}; +    Kernel::Scheduler& scheduler; +    std::unique_ptr<Common::SpinLock> guard;  };  } // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index c4c5199b1..f9d7c024d 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -22,6 +22,7 @@  #include "core/hle/kernel/resource_limit.h"  #include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/thread.h" +#include "core/hle/lock.h"  #include "core/memory.h"  #include "core/settings.h" @@ -30,14 +31,15 @@ namespace {  /**   * Sets up the primary application thread   * + * @param system The system instance to create the main thread under.   * @param owner_process The parent process for the main thread - * @param kernel The kernel instance to create the main thread under.   * @param priority The priority to give the main thread   */ -void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, VAddr stack_top) { +void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {      const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); -    auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, -                                     owner_process.GetIdealCore(), stack_top, owner_process); +    ThreadType type = THREADTYPE_USER; +    auto thread_res = Thread::Create(system, type, "main", entry_point, priority, 0, +                                     owner_process.GetIdealCore(), stack_top, &owner_process);      std::shared_ptr<Thread> thread = std::move(thread_res).Unwrap(); @@ -48,8 +50,12 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, V      thread->GetContext32().cpu_registers[1] = thread_handle;      thread->GetContext64().cpu_registers[1] = thread_handle; +    auto& kernel = system.Kernel();      // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires -    thread->ResumeFromWait(); +    { +        SchedulerLock lock{kernel}; +        thread->SetStatus(ThreadStatus::Ready); +    }  }  } // Anonymous namespace @@ -182,7 +188,6 @@ void Process::RemoveConditionVariableThread(std::shared_ptr<Thread> thread) {          }          ++it;      } -    UNREACHABLE();  }  std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads( @@ -207,6 +212,7 @@ void Process::UnregisterThread(const Thread* thread) {  }  ResultCode Process::ClearSignalState() { +    SchedulerLock lock(system.Kernel());      if (status == ProcessStatus::Exited) {          LOG_ERROR(Kernel, "called on a terminated process instance.");          return ERR_INVALID_STATE; @@ -294,7 +300,7 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {      ChangeStatus(ProcessStatus::Running); -    SetupMainThread(*this, kernel, main_thread_priority, main_thread_stack_top); +    SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);      resource_limit->Reserve(ResourceType::Threads, 1);      resource_limit->Reserve(ResourceType::PhysicalMemory, main_thread_stack_size);  } @@ -340,6 +346,7 @@ static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {  }  VAddr Process::CreateTLSRegion() { +    SchedulerLock lock(system.Kernel());      if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)};          tls_page_iter != tls_pages.cend()) {          return *tls_page_iter->ReserveSlot(); @@ -370,6 +377,7 @@ VAddr Process::CreateTLSRegion() {  }  void Process::FreeTLSRegion(VAddr tls_address) { +    SchedulerLock lock(system.Kernel());      const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE);      auto iter =          std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) { @@ -384,6 +392,7 @@ void Process::FreeTLSRegion(VAddr tls_address) {  }  void Process::LoadModule(CodeSet code_set, VAddr base_addr) { +    std::lock_guard lock{HLE::g_hle_lock};      const auto ReprotectSegment = [&](const CodeSet::Segment& segment,                                        Memory::MemoryPermission permission) {          page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission); diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index ef5e19e63..6e286419e 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp @@ -6,8 +6,10 @@  #include "common/assert.h"  #include "common/logging/log.h"  #include "core/hle/kernel/errors.h" +#include "core/hle/kernel/kernel.h"  #include "core/hle/kernel/object.h"  #include "core/hle/kernel/readable_event.h" +#include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/thread.h"  namespace Kernel { @@ -37,6 +39,7 @@ void ReadableEvent::Clear() {  }  ResultCode ReadableEvent::Reset() { +    SchedulerLock lock(kernel);      if (!is_signaled) {          LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",                    GetObjectId(), GetTypeName(), GetName()); diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 1140c72a3..2b12c0dbf 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -11,11 +11,15 @@  #include <utility>  #include "common/assert.h" +#include "common/bit_util.h" +#include "common/fiber.h"  #include "common/logging/log.h"  #include "core/arm/arm_interface.h"  #include "core/core.h"  #include "core/core_timing.h" +#include "core/cpu_manager.h"  #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/physical_core.h"  #include "core/hle/kernel/process.h"  #include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/time_manager.h" @@ -27,103 +31,151 @@ GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {}  GlobalScheduler::~GlobalScheduler() = default;  void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) { +    global_list_guard.lock();      thread_list.push_back(std::move(thread)); +    global_list_guard.unlock();  }  void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) { +    global_list_guard.lock();      thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),                        thread_list.end()); +    global_list_guard.unlock();  } -void GlobalScheduler::UnloadThread(std::size_t core) { -    Scheduler& sched = kernel.Scheduler(core); -    sched.UnloadThread(); -} - -void GlobalScheduler::SelectThread(std::size_t core) { +u32 GlobalScheduler::SelectThreads() { +    ASSERT(is_locked);      const auto update_thread = [](Thread* thread, Scheduler& sched) { -        if (thread != sched.selected_thread.get()) { +        sched.guard.lock(); +        if (thread != sched.selected_thread_set.get()) {              if (thread == nullptr) {                  ++sched.idle_selection_count;              } -            sched.selected_thread = SharedFrom(thread); +            sched.selected_thread_set = SharedFrom(thread);          } -        sched.is_context_switch_pending = sched.selected_thread != sched.current_thread; +        const bool reschedule_pending = +            sched.is_context_switch_pending || (sched.selected_thread_set != sched.current_thread); +        sched.is_context_switch_pending = reschedule_pending;          std::atomic_thread_fence(std::memory_order_seq_cst); +        sched.guard.unlock(); +        return reschedule_pending;      }; -    Scheduler& sched = kernel.Scheduler(core); -    Thread* current_thread = nullptr; -    // Step 1: Get top thread in schedule queue. -    current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); -    if (current_thread) { -        update_thread(current_thread, sched); -        return; +    if (!is_reselection_pending.load()) { +        return 0;      } -    // Step 2: Try selecting a suggested thread. -    Thread* winner = nullptr; -    std::set<s32> sug_cores; -    for (auto thread : suggested_queue[core]) { -        s32 this_core = thread->GetProcessorID(); -        Thread* thread_on_core = nullptr; -        if (this_core >= 0) { -            thread_on_core = scheduled_queue[this_core].front(); -        } -        if (this_core < 0 || thread != thread_on_core) { -            winner = thread; -            break; +    std::array<Thread*, Core::Hardware::NUM_CPU_CORES> top_threads{}; + +    u32 idle_cores{}; + +    // Step 1: Get top thread in schedule queue. +    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +        Thread* top_thread = +            scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); +        if (top_thread != nullptr) { +            // TODO(Blinkhawk): Implement Thread Pinning +        } else { +            idle_cores |= (1ul << core);          } -        sug_cores.insert(this_core); +        top_threads[core] = top_thread;      } -    // if we got a suggested thread, select it, else do a second pass. -    if (winner && winner->GetPriority() > 2) { -        if (winner->IsRunning()) { -            UnloadThread(static_cast<u32>(winner->GetProcessorID())); + +    while (idle_cores != 0) { +        u32 core_id = Common::CountTrailingZeroes32(idle_cores); + +        if (!suggested_queue[core_id].empty()) { +            std::array<s32, Core::Hardware::NUM_CPU_CORES> migration_candidates{}; +            std::size_t num_candidates = 0; +            auto iter = suggested_queue[core_id].begin(); +            Thread* suggested = nullptr; +            // Step 2: Try selecting a suggested thread. +            while (iter != suggested_queue[core_id].end()) { +                suggested = *iter; +                iter++; +                s32 suggested_core_id = suggested->GetProcessorID(); +                Thread* top_thread = +                    suggested_core_id >= 0 ? top_threads[suggested_core_id] : nullptr; +                if (top_thread != suggested) { +                    if (top_thread != nullptr && +                        top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) { +                        suggested = nullptr; +                        break; +                        // There's a too high thread to do core migration, cancel +                    } +                    TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested); +                    break; +                } +                suggested = nullptr; +                migration_candidates[num_candidates++] = suggested_core_id; +            } +            // Step 3: Select a suggested thread from another core +            if (suggested == nullptr) { +                for (std::size_t i = 0; i < num_candidates; i++) { +                    s32 candidate_core = migration_candidates[i]; +                    suggested = top_threads[candidate_core]; +                    auto it = scheduled_queue[candidate_core].begin(); +                    it++; +                    Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr; +                    if (next != nullptr) { +                        TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), +                                       suggested); +                        top_threads[candidate_core] = next; +                        break; +                    } else { +                        suggested = nullptr; +                    } +                } +            } +            top_threads[core_id] = suggested;          } -        TransferToCore(winner->GetPriority(), static_cast<s32>(core), winner); -        update_thread(winner, sched); -        return; + +        idle_cores &= ~(1ul << core_id);      } -    // Step 3: Select a suggested thread from another core -    for (auto& src_core : sug_cores) { -        auto it = scheduled_queue[src_core].begin(); -        it++; -        if (it != scheduled_queue[src_core].end()) { -            Thread* thread_on_core = scheduled_queue[src_core].front(); -            Thread* to_change = *it; -            if (thread_on_core->IsRunning() || to_change->IsRunning()) { -                UnloadThread(static_cast<u32>(src_core)); -            } -            TransferToCore(thread_on_core->GetPriority(), static_cast<s32>(core), thread_on_core); -            current_thread = thread_on_core; -            break; +    u32 cores_needing_context_switch{}; +    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +        Scheduler& sched = kernel.Scheduler(core); +        ASSERT(top_threads[core] == nullptr || top_threads[core]->GetProcessorID() == core); +        if (update_thread(top_threads[core], sched)) { +            cores_needing_context_switch |= (1ul << core);          }      } -    update_thread(current_thread, sched); +    return cores_needing_context_switch;  }  bool GlobalScheduler::YieldThread(Thread* yielding_thread) { +    ASSERT(is_locked);      // Note: caller should use critical section, etc. +    if (!yielding_thread->IsRunnable()) { +        // Normally this case shouldn't happen except for SetThreadActivity. +        is_reselection_pending.store(true, std::memory_order_release); +        return false; +    }      const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());      const u32 priority = yielding_thread->GetPriority();      // Yield the thread -    const Thread* const winner = scheduled_queue[core_id].front(priority); -    ASSERT_MSG(yielding_thread == winner, "Thread yielding without being in front"); -    scheduled_queue[core_id].yield(priority); +    Reschedule(priority, core_id, yielding_thread); +    const Thread* const winner = scheduled_queue[core_id].front(); +    if (kernel.GetCurrentHostThreadID() != core_id) { +        is_reselection_pending.store(true, std::memory_order_release); +    }      return AskForReselectionOrMarkRedundant(yielding_thread, winner);  }  bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { +    ASSERT(is_locked);      // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,      // etc. +    if (!yielding_thread->IsRunnable()) { +        // Normally this case shouldn't happen except for SetThreadActivity. +        is_reselection_pending.store(true, std::memory_order_release); +        return false; +    }      const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());      const u32 priority = yielding_thread->GetPriority();      // Yield the thread -    ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority), -               "Thread yielding without being in front"); -    scheduled_queue[core_id].yield(priority); +    Reschedule(priority, core_id, yielding_thread);      std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;      for (std::size_t i = 0; i < current_threads.size(); i++) { @@ -153,21 +205,28 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {      if (winner != nullptr) {          if (winner != yielding_thread) { -            if (winner->IsRunning()) { -                UnloadThread(static_cast<u32>(winner->GetProcessorID())); -            }              TransferToCore(winner->GetPriority(), s32(core_id), winner);          }      } else {          winner = next_thread;      } +    if (kernel.GetCurrentHostThreadID() != core_id) { +        is_reselection_pending.store(true, std::memory_order_release); +    } +      return AskForReselectionOrMarkRedundant(yielding_thread, winner);  }  bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) { +    ASSERT(is_locked);      // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,      // etc. +    if (!yielding_thread->IsRunnable()) { +        // Normally this case shouldn't happen except for SetThreadActivity. +        is_reselection_pending.store(true, std::memory_order_release); +        return false; +    }      Thread* winner = nullptr;      const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID()); @@ -195,25 +254,31 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread          }          if (winner != nullptr) {              if (winner != yielding_thread) { -                if (winner->IsRunning()) { -                    UnloadThread(static_cast<u32>(winner->GetProcessorID())); -                }                  TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner);              }          } else {              winner = yielding_thread;          } +    } else { +        winner = scheduled_queue[core_id].front(); +    } + +    if (kernel.GetCurrentHostThreadID() != core_id) { +        is_reselection_pending.store(true, std::memory_order_release);      }      return AskForReselectionOrMarkRedundant(yielding_thread, winner);  }  void GlobalScheduler::PreemptThreads() { +    ASSERT(is_locked);      for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {          const u32 priority = preemption_priorities[core_id];          if (scheduled_queue[core_id].size(priority) > 0) { -            scheduled_queue[core_id].front(priority)->IncrementYieldCount(); +            if (scheduled_queue[core_id].size(priority) > 1) { +                scheduled_queue[core_id].front(priority)->IncrementYieldCount(); +            }              scheduled_queue[core_id].yield(priority);              if (scheduled_queue[core_id].size(priority) > 1) {                  scheduled_queue[core_id].front(priority)->IncrementYieldCount(); @@ -247,9 +312,6 @@ void GlobalScheduler::PreemptThreads() {          }          if (winner != nullptr) { -            if (winner->IsRunning()) { -                UnloadThread(static_cast<u32>(winner->GetProcessorID())); -            }              TransferToCore(winner->GetPriority(), s32(core_id), winner);              current_thread =                  winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread; @@ -280,9 +342,6 @@ void GlobalScheduler::PreemptThreads() {              }              if (winner != nullptr) { -                if (winner->IsRunning()) { -                    UnloadThread(static_cast<u32>(winner->GetProcessorID())); -                }                  TransferToCore(winner->GetPriority(), s32(core_id), winner);                  current_thread = winner;              } @@ -292,34 +351,65 @@ void GlobalScheduler::PreemptThreads() {      }  } +void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule, +                                                 Core::EmuThreadHandle global_thread) { +    u32 current_core = global_thread.host_handle; +    bool must_context_switch = global_thread.guest_handle != InvalidHandle && +                               (current_core < Core::Hardware::NUM_CPU_CORES); +    while (cores_pending_reschedule != 0) { +        u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule); +        ASSERT(core < Core::Hardware::NUM_CPU_CORES); +        if (!must_context_switch || core != current_core) { +            auto& phys_core = kernel.PhysicalCore(core); +            phys_core.Interrupt(); +        } else { +            must_context_switch = true; +        } +        cores_pending_reschedule &= ~(1ul << core); +    } +    if (must_context_switch) { +        auto& core_scheduler = kernel.CurrentScheduler(); +        kernel.ExitSVCProfile(); +        core_scheduler.TryDoContextSwitch(); +        kernel.EnterSVCProfile(); +    } +} +  void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) { +    ASSERT(is_locked);      suggested_queue[core].add(thread, priority);  }  void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) { +    ASSERT(is_locked);      suggested_queue[core].remove(thread, priority);  }  void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) { +    ASSERT(is_locked);      ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");      scheduled_queue[core].add(thread, priority);  }  void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) { +    ASSERT(is_locked);      ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");      scheduled_queue[core].add(thread, priority, false);  }  void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) { +    ASSERT(is_locked);      scheduled_queue[core].remove(thread, priority);      scheduled_queue[core].add(thread, priority);  }  void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) { +    ASSERT(is_locked);      scheduled_queue[core].remove(thread, priority);  }  void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) { +    ASSERT(is_locked);      const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;      const s32 source_core = thread->GetProcessorID();      if (source_core == destination_core || !schedulable) { @@ -349,6 +439,108 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,      }  } +void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) { +    if (old_flags == thread->scheduling_state) { +        return; +    } +    ASSERT(is_locked); + +    if (old_flags == static_cast<u32>(ThreadSchedStatus::Runnable)) { +        // In this case the thread was running, now it's pausing/exitting +        if (thread->processor_id >= 0) { +            Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); +        } + +        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +            if (core != static_cast<u32>(thread->processor_id) && +                ((thread->affinity_mask >> core) & 1) != 0) { +                Unsuggest(thread->current_priority, core, thread); +            } +        } +    } else if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) { +        // The thread is now set to running from being stopped +        if (thread->processor_id >= 0) { +            Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); +        } + +        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +            if (core != static_cast<u32>(thread->processor_id) && +                ((thread->affinity_mask >> core) & 1) != 0) { +                Suggest(thread->current_priority, core, thread); +            } +        } +    } + +    SetReselectionPending(); +} + +void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) { +    if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable)) { +        return; +    } +    ASSERT(is_locked); +    if (thread->processor_id >= 0) { +        Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread); +    } + +    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +        if (core != static_cast<u32>(thread->processor_id) && +            ((thread->affinity_mask >> core) & 1) != 0) { +            Unsuggest(old_priority, core, thread); +        } +    } + +    if (thread->processor_id >= 0) { +        if (thread == kernel.CurrentScheduler().GetCurrentThread()) { +            SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id), +                            thread); +        } else { +            Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread); +        } +    } + +    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +        if (core != static_cast<u32>(thread->processor_id) && +            ((thread->affinity_mask >> core) & 1) != 0) { +            Suggest(thread->current_priority, core, thread); +        } +    } +    thread->IncrementYieldCount(); +    SetReselectionPending(); +} + +void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, +                                                 s32 old_core) { +    if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable) || +        thread->current_priority >= THREADPRIO_COUNT) { +        return; +    } +    ASSERT(is_locked); + +    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +        if (((old_affinity_mask >> core) & 1) != 0) { +            if (core == static_cast<u32>(old_core)) { +                Unschedule(thread->current_priority, core, thread); +            } else { +                Unsuggest(thread->current_priority, core, thread); +            } +        } +    } + +    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { +        if (((thread->affinity_mask >> core) & 1) != 0) { +            if (core == static_cast<u32>(thread->processor_id)) { +                Schedule(thread->current_priority, core, thread); +            } else { +                Suggest(thread->current_priority, core, thread); +            } +        } +    } + +    thread->IncrementYieldCount(); +    SetReselectionPending(); +} +  void GlobalScheduler::Shutdown() {      for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {          scheduled_queue[core].clear(); @@ -359,10 +551,12 @@ void GlobalScheduler::Shutdown() {  void GlobalScheduler::Lock() {      Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID(); +    ASSERT(!current_thread.IsInvalid());      if (current_thread == current_owner) {          ++scope_lock;      } else {          inner_lock.lock(); +        is_locked = true;          current_owner = current_thread;          ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle());          scope_lock = 1; @@ -374,17 +568,18 @@ void GlobalScheduler::Unlock() {          ASSERT(scope_lock > 0);          return;      } -    for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { -        SelectThread(i); -    } +    u32 cores_pending_reschedule = SelectThreads(); +    Core::EmuThreadHandle leaving_thread = current_owner;      current_owner = Core::EmuThreadHandle::InvalidHandle();      scope_lock = 1; +    is_locked = false;      inner_lock.unlock(); -    // TODO(Blinkhawk): Setup the interrupts and change context on current core. +    EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread);  } -Scheduler::Scheduler(Core::System& system, std::size_t core_id) -    : system{system}, core_id{core_id} {} +Scheduler::Scheduler(Core::System& system, std::size_t core_id) : system(system), core_id(core_id) { +    switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this); +}  Scheduler::~Scheduler() = default; @@ -393,56 +588,128 @@ bool Scheduler::HaveReadyThreads() const {  }  Thread* Scheduler::GetCurrentThread() const { -    return current_thread.get(); +    if (current_thread) { +        return current_thread.get(); +    } +    return idle_thread.get();  }  Thread* Scheduler::GetSelectedThread() const {      return selected_thread.get();  } -void Scheduler::SelectThreads() { -    system.GlobalScheduler().SelectThread(core_id); -} -  u64 Scheduler::GetLastContextSwitchTicks() const {      return last_context_switch_time;  }  void Scheduler::TryDoContextSwitch() { +    auto& phys_core = system.Kernel().CurrentPhysicalCore(); +    if (phys_core.IsInterrupted()) { +        phys_core.ClearInterrupt(); +    } +    guard.lock();      if (is_context_switch_pending) {          SwitchContext(); +    } else { +        guard.unlock();      }  } -void Scheduler::UnloadThread() { -    Thread* const previous_thread = GetCurrentThread(); -    Process* const previous_process = system.Kernel().CurrentProcess(); +void Scheduler::OnThreadStart() { +    SwitchContextStep2(); +} -    UpdateLastContextSwitchTime(previous_thread, previous_process); +void Scheduler::Unload() { +    Thread* thread = current_thread.get(); +    if (thread) { +        thread->SetContinuousOnSVC(false); +        thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); +        thread->SetIsRunning(false); +        if (!thread->IsHLEThread() && !thread->HasExited()) { +            Core::ARM_Interface& cpu_core = thread->ArmInterface(); +            cpu_core.SaveContext(thread->GetContext32()); +            cpu_core.SaveContext(thread->GetContext64()); +            // Save the TPIDR_EL0 system register in case it was modified. +            thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); +            cpu_core.ClearExclusiveState(); +        } +        thread->context_guard.unlock(); +    } +} -    // Save context for previous thread -    if (previous_thread) { -        system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); -        system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); -        // Save the TPIDR_EL0 system register in case it was modified. -        previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); +void Scheduler::Reload() { +    Thread* thread = current_thread.get(); +    if (thread) { +        ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable, +                   "Thread must be runnable."); -        if (previous_thread->GetStatus() == ThreadStatus::Running) { -            // This is only the case when a reschedule is triggered without the current thread -            // yielding execution (i.e. an event triggered, system core time-sliced, etc) -            previous_thread->SetStatus(ThreadStatus::Ready); +        // Cancel any outstanding wakeup events for this thread +        thread->SetIsRunning(true); +        thread->SetWasRunning(false); +        thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); + +        auto* const thread_owner_process = thread->GetOwnerProcess(); +        if (thread_owner_process != nullptr) { +            system.Kernel().MakeCurrentProcess(thread_owner_process); +        } +        if (!thread->IsHLEThread()) { +            Core::ARM_Interface& cpu_core = thread->ArmInterface(); +            cpu_core.LoadContext(thread->GetContext32()); +            cpu_core.LoadContext(thread->GetContext64()); +            cpu_core.SetTlsAddress(thread->GetTLSAddress()); +            cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0()); +            cpu_core.ChangeProcessorID(this->core_id); +            cpu_core.ClearExclusiveState();          } -        previous_thread->SetIsRunning(false);      } -    current_thread = nullptr; +} + +void Scheduler::SwitchContextStep2() { +    Thread* previous_thread = current_thread_prev.get(); +    Thread* new_thread = selected_thread.get(); + +    // Load context of new thread +    Process* const previous_process = +        previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr; + +    if (new_thread) { +        ASSERT_MSG(new_thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable, +                   "Thread must be runnable."); + +        // Cancel any outstanding wakeup events for this thread +        new_thread->SetIsRunning(true); +        new_thread->last_running_ticks = system.CoreTiming().GetCPUTicks(); +        new_thread->SetWasRunning(false); + +        auto* const thread_owner_process = current_thread->GetOwnerProcess(); +        if (thread_owner_process != nullptr) { +            system.Kernel().MakeCurrentProcess(thread_owner_process); +        } +        if (!new_thread->IsHLEThread()) { +            Core::ARM_Interface& cpu_core = new_thread->ArmInterface(); +            cpu_core.LoadContext(new_thread->GetContext32()); +            cpu_core.LoadContext(new_thread->GetContext64()); +            cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); +            cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); +            cpu_core.ChangeProcessorID(this->core_id); +            cpu_core.ClearExclusiveState(); +        } +    } + +    TryDoContextSwitch();  }  void Scheduler::SwitchContext() { -    Thread* const previous_thread = GetCurrentThread(); -    Thread* const new_thread = GetSelectedThread(); +    current_thread_prev = current_thread; +    selected_thread = selected_thread_set; +    Thread* previous_thread = current_thread_prev.get(); +    Thread* new_thread = selected_thread.get(); +    current_thread = selected_thread;      is_context_switch_pending = false; +      if (new_thread == previous_thread) { +        guard.unlock();          return;      } @@ -452,51 +719,75 @@ void Scheduler::SwitchContext() {      // Save context for previous thread      if (previous_thread) { -        system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32()); -        system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64()); -        // Save the TPIDR_EL0 system register in case it was modified. -        previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0()); - -        if (previous_thread->GetStatus() == ThreadStatus::Running) { -            // This is only the case when a reschedule is triggered without the current thread -            // yielding execution (i.e. an event triggered, system core time-sliced, etc) -            previous_thread->SetStatus(ThreadStatus::Ready); +        if (new_thread != nullptr && new_thread->IsSuspendThread()) { +            previous_thread->SetWasRunning(true);          } +        previous_thread->SetContinuousOnSVC(false); +        previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();          previous_thread->SetIsRunning(false); -    } - -    // Load context of new thread -    if (new_thread) { -        ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id), -                   "Thread must be assigned to this core."); -        ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready, -                   "Thread must be ready to become running."); - -        // Cancel any outstanding wakeup events for this thread -        new_thread->CancelWakeupTimer(); -        current_thread = SharedFrom(new_thread); -        new_thread->SetStatus(ThreadStatus::Running); -        new_thread->SetIsRunning(true); - -        auto* const thread_owner_process = current_thread->GetOwnerProcess(); -        if (previous_process != thread_owner_process) { -            system.Kernel().MakeCurrentProcess(thread_owner_process); +        if (!previous_thread->IsHLEThread() && !previous_thread->HasExited()) { +            Core::ARM_Interface& cpu_core = previous_thread->ArmInterface(); +            cpu_core.SaveContext(previous_thread->GetContext32()); +            cpu_core.SaveContext(previous_thread->GetContext64()); +            // Save the TPIDR_EL0 system register in case it was modified. +            previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); +            cpu_core.ClearExclusiveState();          } +        previous_thread->context_guard.unlock(); +    } -        system.ArmInterface(core_id).LoadContext(new_thread->GetContext32()); -        system.ArmInterface(core_id).LoadContext(new_thread->GetContext64()); -        system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress()); -        system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); +    std::shared_ptr<Common::Fiber>* old_context; +    if (previous_thread != nullptr) { +        old_context = &previous_thread->GetHostContext();      } else { -        current_thread = nullptr; -        // Note: We do not reset the current process and current page table when idling because -        // technically we haven't changed processes, our threads are just paused. +        old_context = &idle_thread->GetHostContext(); +    } +    guard.unlock(); + +    Common::Fiber::YieldTo(*old_context, switch_fiber); +    /// When a thread wakes up, the scheduler may have changed to other in another core. +    auto& next_scheduler = system.Kernel().CurrentScheduler(); +    next_scheduler.SwitchContextStep2(); +} + +void Scheduler::OnSwitch(void* this_scheduler) { +    Scheduler* sched = static_cast<Scheduler*>(this_scheduler); +    sched->SwitchToCurrent(); +} + +void Scheduler::SwitchToCurrent() { +    while (true) { +        guard.lock(); +        selected_thread = selected_thread_set; +        current_thread = selected_thread; +        is_context_switch_pending = false; +        guard.unlock(); +        while (!is_context_switch_pending) { +            if (current_thread != nullptr && !current_thread->IsHLEThread()) { +                current_thread->context_guard.lock(); +                if (!current_thread->IsRunnable()) { +                    current_thread->context_guard.unlock(); +                    break; +                } +                if (current_thread->GetProcessorID() != core_id) { +                    current_thread->context_guard.unlock(); +                    break; +                } +            } +            std::shared_ptr<Common::Fiber>* next_context; +            if (current_thread != nullptr) { +                next_context = ¤t_thread->GetHostContext(); +            } else { +                next_context = &idle_thread->GetHostContext(); +            } +            Common::Fiber::YieldTo(switch_fiber, *next_context); +        }      }  }  void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {      const u64 prev_switch_ticks = last_context_switch_time; -    const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks(); +    const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();      const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;      if (thread != nullptr) { @@ -510,6 +801,16 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {      last_context_switch_time = most_recent_switch_ticks;  } +void Scheduler::Initialize() { +    std::string name = "Idle Thread Id:" + std::to_string(core_id); +    std::function<void(void*)> init_func = system.GetCpuManager().GetIdleThreadStartFunc(); +    void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); +    ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE); +    auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0, +                                     nullptr, std::move(init_func), init_func_parameter); +    idle_thread = std::move(thread_res).Unwrap(); +} +  void Scheduler::Shutdown() {      current_thread = nullptr;      selected_thread = nullptr; @@ -538,4 +839,13 @@ SchedulerLockAndSleep::~SchedulerLockAndSleep() {      time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);  } +void SchedulerLockAndSleep::Release() { +    if (sleep_cancelled) { +        return; +    } +    auto& time_manager = kernel.TimeManager(); +    time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds); +    sleep_cancelled = true; +} +  } // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 07df33f9c..b3b4b5169 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -11,9 +11,14 @@  #include "common/common_types.h"  #include "common/multi_level_queue.h" +#include "common/spin_lock.h"  #include "core/hardware_properties.h"  #include "core/hle/kernel/thread.h" +namespace Common { +class Fiber; +} +  namespace Core {  class ARM_Interface;  class System; @@ -41,41 +46,17 @@ public:          return thread_list;      } -    /** -     * Add a thread to the suggested queue of a cpu core. Suggested threads may be -     * picked if no thread is scheduled to run on the core. -     */ -    void Suggest(u32 priority, std::size_t core, Thread* thread); - -    /** -     * Remove a thread to the suggested queue of a cpu core. Suggested threads may be -     * picked if no thread is scheduled to run on the core. -     */ -    void Unsuggest(u32 priority, std::size_t core, Thread* thread); - -    /** -     * Add a thread to the scheduling queue of a cpu core. The thread is added at the -     * back the queue in its priority level. -     */ -    void Schedule(u32 priority, std::size_t core, Thread* thread); - -    /** -     * Add a thread to the scheduling queue of a cpu core. The thread is added at the -     * front the queue in its priority level. -     */ -    void SchedulePrepend(u32 priority, std::size_t core, Thread* thread); +    /// Notify the scheduler a thread's status has changed. +    void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags); -    /// Reschedule an already scheduled thread based on a new priority -    void Reschedule(u32 priority, std::size_t core, Thread* thread); - -    /// Unschedules a thread. -    void Unschedule(u32 priority, std::size_t core, Thread* thread); +    /// Notify the scheduler a thread's priority has changed. +    void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority); -    /// Selects a core and forces it to unload its current thread's context -    void UnloadThread(std::size_t core); +    /// Notify the scheduler a thread's core and/or affinity mask has changed. +    void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core);      /** -     * Takes care of selecting the new scheduled thread in three steps: +     * Takes care of selecting the new scheduled threads in three steps:       *       * 1. First a thread is selected from the top of the priority queue. If no thread       *    is obtained then we move to step two, else we are done. @@ -85,8 +66,10 @@ public:       *       * 3. Third is no suggested thread is found, we do a second pass and pick a running       *    thread in another core and swap it with its current thread. +     * +     * returns the cores needing scheduling.       */ -    void SelectThread(std::size_t core); +    u32 SelectThreads();      bool HaveReadyThreads(std::size_t core_id) const {          return !scheduled_queue[core_id].empty(); @@ -149,6 +132,40 @@ private:      /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling      /// and reschedules current core if needed.      void Unlock(); + +    void EnableInterruptAndSchedule(u32 cores_pending_reschedule, +                                    Core::EmuThreadHandle global_thread); + +    /** +     * Add a thread to the suggested queue of a cpu core. Suggested threads may be +     * picked if no thread is scheduled to run on the core. +     */ +    void Suggest(u32 priority, std::size_t core, Thread* thread); + +    /** +     * Remove a thread to the suggested queue of a cpu core. Suggested threads may be +     * picked if no thread is scheduled to run on the core. +     */ +    void Unsuggest(u32 priority, std::size_t core, Thread* thread); + +    /** +     * Add a thread to the scheduling queue of a cpu core. The thread is added at the +     * back the queue in its priority level. +     */ +    void Schedule(u32 priority, std::size_t core, Thread* thread); + +    /** +     * Add a thread to the scheduling queue of a cpu core. The thread is added at the +     * front the queue in its priority level. +     */ +    void SchedulePrepend(u32 priority, std::size_t core, Thread* thread); + +    /// Reschedule an already scheduled thread based on a new priority +    void Reschedule(u32 priority, std::size_t core, Thread* thread); + +    /// Unschedules a thread. +    void Unschedule(u32 priority, std::size_t core, Thread* thread); +      /**       * Transfers a thread into an specific core. If the destination_core is -1       * it will be unscheduled from its source code and added into its suggested @@ -170,10 +187,13 @@ private:      std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};      /// Scheduler lock mechanisms. -    std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock +    bool is_locked{}; +    Common::SpinLock inner_lock{};      std::atomic<s64> scope_lock{};      Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()}; +    Common::SpinLock global_list_guard{}; +      /// Lists all thread ids that aren't deleted/etc.      std::vector<std::shared_ptr<Thread>> thread_list;      KernelCore& kernel; @@ -190,11 +210,11 @@ public:      /// Reschedules to the next available thread (call after current thread is suspended)      void TryDoContextSwitch(); -    /// Unloads currently running thread -    void UnloadThread(); - -    /// Select the threads in top of the scheduling multilist. -    void SelectThreads(); +    /// The next two are for SingleCore Only. +    /// Unload current thread before preempting core. +    void Unload(); +    /// Reload current thread after core preemption. +    void Reload();      /// Gets the current running thread      Thread* GetCurrentThread() const; @@ -209,15 +229,30 @@ public:          return is_context_switch_pending;      } +    void Initialize(); +      /// Shutdowns the scheduler.      void Shutdown(); +    void OnThreadStart(); + +    std::shared_ptr<Common::Fiber>& ControlContext() { +        return switch_fiber; +    } + +    const std::shared_ptr<Common::Fiber>& ControlContext() const { +        return switch_fiber; +    } +  private:      friend class GlobalScheduler;      /// Switches the CPU's active thread context to that of the specified thread      void SwitchContext(); +    /// When a thread wakes up, it must run this through it's new scheduler +    void SwitchContextStep2(); +      /**       * Called on every context switch to update the internal timestamp       * This also updates the running time ticks for the given thread and @@ -231,14 +266,24 @@ private:       */      void UpdateLastContextSwitchTime(Thread* thread, Process* process); +    static void OnSwitch(void* this_scheduler); +    void SwitchToCurrent(); +      std::shared_ptr<Thread> current_thread = nullptr;      std::shared_ptr<Thread> selected_thread = nullptr; +    std::shared_ptr<Thread> current_thread_prev = nullptr; +    std::shared_ptr<Thread> selected_thread_set = nullptr; +    std::shared_ptr<Thread> idle_thread = nullptr; + +    std::shared_ptr<Common::Fiber> switch_fiber = nullptr;      Core::System& system;      u64 last_context_switch_time = 0;      u64 idle_selection_count = 0;      const std::size_t core_id; +    Common::SpinLock guard{}; +      bool is_context_switch_pending = false;  }; @@ -261,6 +306,8 @@ public:          sleep_cancelled = true;      } +    void Release(); +  private:      Handle& event_handle;      Thread* time_task; diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 25438b86b..7b23a6889 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -17,6 +17,7 @@  #include "core/hle/kernel/hle_ipc.h"  #include "core/hle/kernel/kernel.h"  #include "core/hle/kernel/process.h" +#include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/server_session.h"  #include "core/hle/kernel/session.h"  #include "core/hle/kernel/thread.h" @@ -168,9 +169,12 @@ ResultCode ServerSession::CompleteSyncRequest() {      }      // Some service requests require the thread to block -    if (!context.IsThreadWaiting()) { -        context.GetThread().ResumeFromWait(); -        context.GetThread().SetWaitSynchronizationResult(result); +    { +        SchedulerLock lock(kernel); +        if (!context.IsThreadWaiting()) { +            context.GetThread().ResumeFromWait(); +            context.GetThread().SetSynchronizationResults(nullptr, result); +        }      }      request_queue.Pop(); @@ -180,8 +184,10 @@ ResultCode ServerSession::CompleteSyncRequest() {  ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,                                              Core::Memory::Memory& memory) { -    Core::System::GetInstance().CoreTiming().ScheduleEvent(20000, request_event, {}); -    return QueueSyncRequest(std::move(thread), memory); +    ResultCode result = QueueSyncRequest(std::move(thread), memory); +    const u64 delay = kernel.IsMulticore() ? 0U : 20000U; +    Core::System::GetInstance().CoreTiming().ScheduleEvent(delay, request_event, {}); +    return result;  }  } // namespace Kernel diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 4ae4529f5..5db19dcf3 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -10,14 +10,15 @@  #include "common/alignment.h"  #include "common/assert.h" +#include "common/fiber.h"  #include "common/logging/log.h"  #include "common/microprofile.h"  #include "common/string_util.h"  #include "core/arm/exclusive_monitor.h"  #include "core/core.h" -#include "core/core_manager.h"  #include "core/core_timing.h"  #include "core/core_timing_util.h" +#include "core/cpu_manager.h"  #include "core/hle/kernel/address_arbiter.h"  #include "core/hle/kernel/client_port.h"  #include "core/hle/kernel/client_session.h" @@ -27,6 +28,7 @@  #include "core/hle/kernel/memory/memory_block.h"  #include "core/hle/kernel/memory/page_table.h"  #include "core/hle/kernel/mutex.h" +#include "core/hle/kernel/physical_core.h"  #include "core/hle/kernel/process.h"  #include "core/hle/kernel/readable_event.h"  #include "core/hle/kernel/resource_limit.h" @@ -37,6 +39,7 @@  #include "core/hle/kernel/svc_wrap.h"  #include "core/hle/kernel/synchronization.h"  #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h"  #include "core/hle/kernel/transfer_memory.h"  #include "core/hle/kernel/writable_event.h"  #include "core/hle/lock.h" @@ -133,6 +136,7 @@ enum class ResourceLimitValueType {  ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,                                            u32 resource_type, ResourceLimitValueType value_type) { +    std::lock_guard lock{HLE::g_hle_lock};      const auto type = static_cast<ResourceType>(resource_type);      if (!IsValidResourceType(type)) {          LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); @@ -160,6 +164,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_  /// Set the process heap to a given Size. It can both extend and shrink the heap.  static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);      // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB. @@ -190,6 +195,7 @@ static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_s  static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask,                                       u32 attribute) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_DEBUG(Kernel_SVC,                "called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address,                size, mask, attribute); @@ -226,8 +232,15 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si                                           static_cast<Memory::MemoryAttribute>(attribute));  } +static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask, +                                       u32 attribute) { +    return SetMemoryAttribute(system, static_cast<VAddr>(address), static_cast<std::size_t>(size), +                              mask, attribute); +} +  /// Maps a memory range into a different range.  static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,                src_addr, size); @@ -241,8 +254,14 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr      return page_table.Map(dst_addr, src_addr, size);  } +static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { +    return MapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr), +                     static_cast<std::size_t>(size)); +} +  /// Unmaps a region that was previously mapped with svcMapMemory  static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,                src_addr, size); @@ -256,9 +275,15 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad      return page_table.Unmap(dst_addr, src_addr, size);  } +static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { +    return UnmapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr), +                       static_cast<std::size_t>(size)); +} +  /// Connect to an OS service given the port name, returns the handle to the port to out  static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,                                       VAddr port_name_address) { +    std::lock_guard lock{HLE::g_hle_lock};      auto& memory = system.Memory();      if (!memory.IsValidVirtualAddress(port_name_address)) { @@ -317,11 +342,30 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {      LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());      auto thread = system.CurrentScheduler().GetCurrentThread(); -    thread->InvalidateWakeupCallback(); -    thread->SetStatus(ThreadStatus::WaitIPC); -    system.PrepareReschedule(thread->GetProcessorID()); +    { +        SchedulerLock lock(system.Kernel()); +        thread->InvalidateHLECallback(); +        thread->SetStatus(ThreadStatus::WaitIPC); +        session->SendSyncRequest(SharedFrom(thread), system.Memory()); +    } + +    if (thread->HasHLECallback()) { +        Handle event_handle = thread->GetHLETimeEvent(); +        if (event_handle != InvalidHandle) { +            auto& time_manager = system.Kernel().TimeManager(); +            time_manager.UnscheduleTimeEvent(event_handle); +        } + +        { +            SchedulerLock lock(system.Kernel()); +            auto* sync_object = thread->GetHLESyncObject(); +            sync_object->RemoveWaitingThread(SharedFrom(thread)); +        } + +        thread->InvokeHLECallback(SharedFrom(thread)); +    } -    return session->SendSyncRequest(SharedFrom(thread), system.Memory()); +    return thread->GetSignalingResult();  }  static ResultCode SendSyncRequest32(Core::System& system, Handle handle) { @@ -383,6 +427,15 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han      return ERR_INVALID_HANDLE;  } +static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high, +                                 Handle handle) { +    u64 process_id{}; +    const auto result = GetProcessId(system, &process_id, handle); +    *process_id_low = static_cast<u32>(process_id); +    *process_id_high = static_cast<u32>(process_id >> 32); +    return result; +} +  /// Wait for the given handles to synchronize, timeout after the specified nanoseconds  static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,                                        u64 handle_count, s64 nano_seconds) { @@ -447,10 +500,13 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand      }      thread->CancelWait(); -    system.PrepareReschedule(thread->GetProcessorID());      return RESULT_SUCCESS;  } +static ResultCode CancelSynchronization32(Core::System& system, Handle thread_handle) { +    return CancelSynchronization(system, thread_handle); +} +  /// Attempts to locks a mutex, creating it if it does not already exist  static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle,                                  VAddr mutex_addr, Handle requesting_thread_handle) { @@ -475,6 +531,12 @@ static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_hand                                                    requesting_thread_handle);  } +static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle, +                                  u32 mutex_addr, Handle requesting_thread_handle) { +    return ArbitrateLock(system, holding_thread_handle, static_cast<VAddr>(mutex_addr), +                         requesting_thread_handle); +} +  /// Unlock a mutex  static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {      LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr); @@ -494,6 +556,10 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {      return current_process->GetMutex().Release(mutex_addr);  } +static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) { +    return ArbitrateUnlock(system, static_cast<VAddr>(mutex_addr)); +} +  enum class BreakType : u32 {      Panic = 0,      AssertionFailed = 1, @@ -594,6 +660,7 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {          info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt);      if (!break_reason.signal_debugger) { +        SchedulerLock lock(system.Kernel());          LOG_CRITICAL(              Debug_Emulated,              "Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}", @@ -605,14 +672,16 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {          const auto thread_processor_id = current_thread->GetProcessorID();          system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace(); -        system.Kernel().CurrentProcess()->PrepareForTermination(); -          // Kill the current thread +        system.Kernel().ExceptionalExit();          current_thread->Stop(); -        system.PrepareReschedule();      }  } +static void Break32(Core::System& system, u32 reason, u32 info1, u32 info2) { +    Break(system, reason, static_cast<u64>(info1), static_cast<u64>(info2)); +} +  /// Used to output a message on a debug hardware unit - does nothing on a retail unit  static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) {      if (len == 0) { @@ -627,6 +696,7 @@ static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr addre  /// Gets system/memory information for the current process  static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle,                            u64 info_sub_id) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,                info_sub_id, handle); @@ -863,9 +933,9 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha          if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {              const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks(); -            out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks); +            out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks);          } else if (same_thread && info_sub_id == system.CurrentCoreIndex()) { -            out_ticks = core_timing.GetTicks() - prev_ctx_ticks; +            out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks;          }          *result = out_ticks; @@ -892,6 +962,7 @@ static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_h  /// Maps memory at a desired address  static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);      if (!Common::Is4KBAligned(addr)) { @@ -939,8 +1010,13 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)      return page_table.MapPhysicalMemory(addr, size);  } +static ResultCode MapPhysicalMemory32(Core::System& system, u32 addr, u32 size) { +    return MapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size)); +} +  /// Unmaps memory previously mapped via MapPhysicalMemory  static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);      if (!Common::Is4KBAligned(addr)) { @@ -988,6 +1064,10 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size      return page_table.UnmapPhysicalMemory(addr, size);  } +static ResultCode UnmapPhysicalMemory32(Core::System& system, u32 addr, u32 size) { +    return UnmapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size)); +} +  /// Sets the thread activity  static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {      LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); @@ -1017,10 +1097,11 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act          return ERR_BUSY;      } -    thread->SetActivity(static_cast<ThreadActivity>(activity)); +    return thread->SetActivity(static_cast<ThreadActivity>(activity)); +} -    system.PrepareReschedule(thread->GetProcessorID()); -    return RESULT_SUCCESS; +static ResultCode SetThreadActivity32(Core::System& system, Handle handle, u32 activity) { +    return SetThreadActivity(system, handle, activity);  }  /// Gets the thread context @@ -1064,6 +1145,10 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H      return RESULT_SUCCESS;  } +static ResultCode GetThreadContext32(Core::System& system, u32 thread_context, Handle handle) { +    return GetThreadContext(system, static_cast<VAddr>(thread_context), handle); +} +  /// Gets the priority for the specified thread  static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) {      LOG_TRACE(Kernel_SVC, "called"); @@ -1071,6 +1156,7 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle      const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();      const std::shared_ptr<Thread> thread = handle_table.Get<Thread>(handle);      if (!thread) { +        *priority = 0;          LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);          return ERR_INVALID_HANDLE;      } @@ -1105,18 +1191,26 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri      thread->SetPriority(priority); -    system.PrepareReschedule(thread->GetProcessorID());      return RESULT_SUCCESS;  } +static ResultCode SetThreadPriority32(Core::System& system, Handle handle, u32 priority) { +    return SetThreadPriority(system, handle, priority); +} +  /// Get which CPU core is executing the current thread  static u32 GetCurrentProcessorNumber(Core::System& system) {      LOG_TRACE(Kernel_SVC, "called"); -    return system.CurrentScheduler().GetCurrentThread()->GetProcessorID(); +    return static_cast<u32>(system.CurrentPhysicalCore().CoreIndex()); +} + +static u32 GetCurrentProcessorNumber32(Core::System& system) { +    return GetCurrentProcessorNumber(system);  }  static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,                                    u64 size, u32 permissions) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_TRACE(Kernel_SVC,                "called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",                shared_memory_handle, addr, size, permissions); @@ -1187,9 +1281,16 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han      return shared_memory->Map(*current_process, addr, size, permission_type);  } +static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr, +                                    u32 size, u32 permissions) { +    return MapSharedMemory(system, shared_memory_handle, static_cast<VAddr>(addr), +                           static_cast<std::size_t>(size), permissions); +} +  static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address,                                       VAddr page_info_address, Handle process_handle,                                       VAddr address) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address);      const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();      std::shared_ptr<Process> process = handle_table.Get<Process>(process_handle); @@ -1372,6 +1473,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha  /// Exits the current process  static void ExitProcess(Core::System& system) {      auto* current_process = system.Kernel().CurrentProcess(); +    UNIMPLEMENTED();      LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());      ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running, @@ -1381,8 +1483,10 @@ static void ExitProcess(Core::System& system) {      // Kill the current thread      system.CurrentScheduler().GetCurrentThread()->Stop(); +} -    system.PrepareReschedule(); +static void ExitProcess32(Core::System& system) { +    ExitProcess(system);  }  /// Creates a new thread @@ -1428,9 +1532,10 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e      ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Threads, 1)); +    ThreadType type = THREADTYPE_USER;      CASCADE_RESULT(std::shared_ptr<Thread> thread, -                   Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top, -                                  *current_process)); +                   Thread::Create(system, type, "", entry_point, priority, arg, processor_id, +                                  stack_top, current_process));      const auto new_thread_handle = current_process->GetHandleTable().Create(thread);      if (new_thread_handle.Failed()) { @@ -1444,11 +1549,15 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e      thread->SetName(          fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); -    system.PrepareReschedule(thread->GetProcessorID()); -      return RESULT_SUCCESS;  } +static ResultCode CreateThread32(Core::System& system, Handle* out_handle, u32 priority, +                                 u32 entry_point, u32 arg, u32 stack_top, s32 processor_id) { +    return CreateThread(system, out_handle, static_cast<VAddr>(entry_point), static_cast<u64>(arg), +                        static_cast<VAddr>(stack_top), priority, processor_id); +} +  /// Starts the thread for the provided handle  static ResultCode StartThread(Core::System& system, Handle thread_handle) {      LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle); @@ -1463,13 +1572,11 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {      ASSERT(thread->GetStatus() == ThreadStatus::Dormant); -    thread->ResumeFromWait(); - -    if (thread->GetStatus() == ThreadStatus::Ready) { -        system.PrepareReschedule(thread->GetProcessorID()); -    } +    return thread->Start(); +} -    return RESULT_SUCCESS; +static ResultCode StartThread32(Core::System& system, Handle thread_handle) { +    return StartThread(system, thread_handle);  }  /// Called when a thread exits @@ -1477,9 +1584,12 @@ static void ExitThread(Core::System& system) {      LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());      auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); -    current_thread->Stop();      system.GlobalScheduler().RemoveThread(SharedFrom(current_thread)); -    system.PrepareReschedule(); +    current_thread->Stop(); +} + +static void ExitThread32(Core::System& system) { +    ExitThread(system);  }  /// Sleep the current thread @@ -1498,15 +1608,21 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {      if (nanoseconds <= 0) {          switch (static_cast<SleepType>(nanoseconds)) { -        case SleepType::YieldWithoutLoadBalancing: -            is_redundant = current_thread->YieldSimple(); +        case SleepType::YieldWithoutLoadBalancing: { +            auto pair = current_thread->YieldSimple(); +            is_redundant = pair.second;              break; -        case SleepType::YieldWithLoadBalancing: -            is_redundant = current_thread->YieldAndBalanceLoad(); +        } +        case SleepType::YieldWithLoadBalancing: { +            auto pair = current_thread->YieldAndBalanceLoad(); +            is_redundant = pair.second;              break; -        case SleepType::YieldAndWaitForLoadBalancing: -            is_redundant = current_thread->YieldAndWaitForLoadBalancing(); +        } +        case SleepType::YieldAndWaitForLoadBalancing: { +            auto pair = current_thread->YieldAndWaitForLoadBalancing(); +            is_redundant = pair.second;              break; +        }          default:              UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);          } @@ -1514,13 +1630,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {          current_thread->Sleep(nanoseconds);      } -    if (is_redundant) { -        // If it's redundant, the core is pretty much idle. Some games keep idling -        // a core while it's doing nothing, we advance timing to avoid costly continuous -        // calls. -        system.CoreTiming().AddTicks(2000); +    if (is_redundant && !system.Kernel().IsMulticore()) { +        system.Kernel().ExitSVCProfile(); +        system.CoreTiming().AddTicks(1000U); +        system.GetCpuManager().PreemptSingleCore(); +        system.Kernel().EnterSVCProfile();      } -    system.PrepareReschedule(current_thread->GetProcessorID()); +} + +static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanoseconds_high) { +    const s64 nanoseconds = static_cast<s64>(static_cast<u64>(nanoseconds_low) | +                                             (static_cast<u64>(nanoseconds_high) << 32)); +    SleepThread(system, nanoseconds);  }  /// Wait process wide key atomic @@ -1547,31 +1668,69 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add      }      ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); - +    auto& kernel = system.Kernel(); +    Handle event_handle; +    Thread* current_thread = system.CurrentScheduler().GetCurrentThread();      auto* const current_process = system.Kernel().CurrentProcess(); -    const auto& handle_table = current_process->GetHandleTable(); -    std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle); -    ASSERT(thread); +    { +        SchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds); +        const auto& handle_table = current_process->GetHandleTable(); +        std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle); +        ASSERT(thread); + +        current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); + +        if (thread->IsPendingTermination()) { +            lock.CancelSleep(); +            return ERR_THREAD_TERMINATING; +        } + +        const auto release_result = current_process->GetMutex().Release(mutex_addr); +        if (release_result.IsError()) { +            lock.CancelSleep(); +            return release_result; +        } + +        if (nano_seconds == 0) { +            lock.CancelSleep(); +            return RESULT_TIMEOUT; +        } -    const auto release_result = current_process->GetMutex().Release(mutex_addr); -    if (release_result.IsError()) { -        return release_result; +        current_thread->SetCondVarWaitAddress(condition_variable_addr); +        current_thread->SetMutexWaitAddress(mutex_addr); +        current_thread->SetWaitHandle(thread_handle); +        current_thread->SetStatus(ThreadStatus::WaitCondVar); +        current_process->InsertConditionVariableThread(SharedFrom(current_thread));      } -    Thread* current_thread = system.CurrentScheduler().GetCurrentThread(); -    current_thread->SetCondVarWaitAddress(condition_variable_addr); -    current_thread->SetMutexWaitAddress(mutex_addr); -    current_thread->SetWaitHandle(thread_handle); -    current_thread->SetStatus(ThreadStatus::WaitCondVar); -    current_thread->InvalidateWakeupCallback(); -    current_process->InsertConditionVariableThread(SharedFrom(current_thread)); +    if (event_handle != InvalidHandle) { +        auto& time_manager = kernel.TimeManager(); +        time_manager.UnscheduleTimeEvent(event_handle); +    } + +    { +        SchedulerLock lock(kernel); -    current_thread->WakeAfterDelay(nano_seconds); +        auto* owner = current_thread->GetLockOwner(); +        if (owner != nullptr) { +            owner->RemoveMutexWaiter(SharedFrom(current_thread)); +        } +        current_process->RemoveConditionVariableThread(SharedFrom(current_thread)); +    }      // Note: Deliberately don't attempt to inherit the lock owner's priority. -    system.PrepareReschedule(current_thread->GetProcessorID()); -    return RESULT_SUCCESS; +    return current_thread->GetSignalingResult(); +} + +static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr, +                                             u32 condition_variable_addr, Handle thread_handle, +                                             u32 nanoseconds_low, u32 nanoseconds_high) { +    const s64 nanoseconds = +        static_cast<s64>(nanoseconds_low | (static_cast<u64>(nanoseconds_high) << 32)); +    return WaitProcessWideKeyAtomic(system, static_cast<VAddr>(mutex_addr), +                                    static_cast<VAddr>(condition_variable_addr), thread_handle, +                                    nanoseconds);  }  /// Signal process wide key @@ -1582,7 +1741,9 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_      ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));      // Retrieve a list of all threads that are waiting for this condition variable. -    auto* const current_process = system.Kernel().CurrentProcess(); +    auto& kernel = system.Kernel(); +    SchedulerLock lock(kernel); +    auto* const current_process = kernel.CurrentProcess();      std::vector<std::shared_ptr<Thread>> waiting_threads =          current_process->GetConditionVariableThreads(condition_variable_addr); @@ -1591,7 +1752,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_      std::size_t last = waiting_threads.size();      if (target > 0)          last = std::min(waiting_threads.size(), static_cast<std::size_t>(target)); - +    auto& time_manager = kernel.TimeManager();      for (std::size_t index = 0; index < last; ++index) {          auto& thread = waiting_threads[index]; @@ -1599,7 +1760,6 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_          // liberate Cond Var Thread.          current_process->RemoveConditionVariableThread(thread); -        thread->SetCondVarWaitAddress(0);          const std::size_t current_core = system.CurrentCoreIndex();          auto& monitor = system.Monitor(); @@ -1610,10 +1770,8 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_          u32 update_val = 0;          const VAddr mutex_address = thread->GetMutexWaitAddress();          do { -            monitor.SetExclusive(current_core, mutex_address); -              // If the mutex is not yet acquired, acquire it. -            mutex_val = memory.Read32(mutex_address); +            mutex_val = monitor.ExclusiveRead32(current_core, mutex_address);              if (mutex_val != 0) {                  update_val = mutex_val | Mutex::MutexHasWaitersFlag; @@ -1621,33 +1779,28 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_                  update_val = thread->GetWaitHandle();              }          } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val)); +        monitor.ClearExclusive();          if (mutex_val == 0) {              // We were able to acquire the mutex, resume this thread. -            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar); -            thread->ResumeFromWait(); -              auto* const lock_owner = thread->GetLockOwner();              if (lock_owner != nullptr) {                  lock_owner->RemoveMutexWaiter(thread);              }              thread->SetLockOwner(nullptr); -            thread->SetMutexWaitAddress(0); -            thread->SetWaitHandle(0); -            thread->SetWaitSynchronizationResult(RESULT_SUCCESS); -            system.PrepareReschedule(thread->GetProcessorID()); +            thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS); +            thread->ResumeFromWait();          } else {              // The mutex is already owned by some other thread, make this thread wait on it.              const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);              const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();              auto owner = handle_table.Get<Thread>(owner_handle);              ASSERT(owner); -            ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar); -            thread->InvalidateWakeupCallback(); -            thread->SetStatus(ThreadStatus::WaitMutex); +            if (thread->GetStatus() == ThreadStatus::WaitCondVar) { +                thread->SetStatus(ThreadStatus::WaitMutex); +            }              owner->AddMutexWaiter(thread); -            system.PrepareReschedule(thread->GetProcessorID());          }      }  } @@ -1678,12 +1831,15 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,      auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();      const ResultCode result =          address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); -    if (result == RESULT_SUCCESS) { -        system.PrepareReschedule(); -    }      return result;  } +static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value, +                                   u32 timeout_low, u32 timeout_high) { +    s64 timeout = static_cast<s64>(timeout_low | (static_cast<u64>(timeout_high) << 32)); +    return WaitForAddress(system, static_cast<VAddr>(address), type, value, timeout); +} +  // Signals to an address (via Address Arbiter)  static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,                                    s32 num_to_wake) { @@ -1707,6 +1863,11 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,      return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);  } +static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value, +                                    s32 num_to_wake) { +    return SignalToAddress(system, static_cast<VAddr>(address), type, value, num_to_wake); +} +  static void KernelDebug([[maybe_unused]] Core::System& system,                          [[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,                          [[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) { @@ -1725,14 +1886,21 @@ static u64 GetSystemTick(Core::System& system) {      auto& core_timing = system.CoreTiming();      // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) -    const u64 result{Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks())}; +    const u64 result{system.CoreTiming().GetClockTicks()}; -    // Advance time to defeat dumb games that busy-wait for the frame to end. -    core_timing.AddTicks(400); +    if (!system.Kernel().IsMulticore()) { +        core_timing.AddTicks(400U); +    }      return result;  } +static void GetSystemTick32(Core::System& system, u32* time_low, u32* time_high) { +    u64 time = GetSystemTick(system); +    *time_low = static_cast<u32>(time); +    *time_high = static_cast<u32>(time >> 32); +} +  /// Close a handle  static ResultCode CloseHandle(Core::System& system, Handle handle) {      LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle); @@ -1765,9 +1933,14 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {      return ERR_INVALID_HANDLE;  } +static ResultCode ResetSignal32(Core::System& system, Handle handle) { +    return ResetSignal(system, handle); +} +  /// Creates a TransferMemory object  static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size,                                         u32 permissions) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,                permissions); @@ -1812,6 +1985,12 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd      return RESULT_SUCCESS;  } +static ResultCode CreateTransferMemory32(Core::System& system, Handle* handle, u32 addr, u32 size, +                                         u32 permissions) { +    return CreateTransferMemory(system, handle, static_cast<VAddr>(addr), +                                static_cast<std::size_t>(size), permissions); +} +  static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core,                                      u64* mask) {      LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle); @@ -1821,6 +2000,8 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,      if (!thread) {          LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",                    thread_handle); +        *core = 0; +        *mask = 0;          return ERR_INVALID_HANDLE;      } @@ -1830,6 +2011,15 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,      return RESULT_SUCCESS;  } +static ResultCode GetThreadCoreMask32(Core::System& system, Handle thread_handle, u32* core, +                                      u32* mask_low, u32* mask_high) { +    u64 mask{}; +    const auto result = GetThreadCoreMask(system, thread_handle, core, &mask); +    *mask_high = static_cast<u32>(mask >> 32); +    *mask_low = static_cast<u32>(mask); +    return result; +} +  static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,                                      u64 affinity_mask) {      LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}", @@ -1861,7 +2051,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,              return ERR_INVALID_COMBINATION;          } -        if (core < Core::NUM_CPU_CORES) { +        if (core < Core::Hardware::NUM_CPU_CORES) {              if ((affinity_mask & (1ULL << core)) == 0) {                  LOG_ERROR(Kernel_SVC,                            "Core is not enabled for the current mask, core={}, mask={:016X}", core, @@ -1883,11 +2073,14 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,          return ERR_INVALID_HANDLE;      } -    system.PrepareReschedule(thread->GetProcessorID()); -    thread->ChangeCore(core, affinity_mask); -    system.PrepareReschedule(thread->GetProcessorID()); +    return thread->SetCoreAndAffinityMask(core, affinity_mask); +} -    return RESULT_SUCCESS; +static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle, u32 core, +                                      u32 affinity_mask_low, u32 affinity_mask_high) { +    const u64 affinity_mask = +        static_cast<u64>(affinity_mask_low) | (static_cast<u64>(affinity_mask_high) << 32); +    return SetThreadCoreMask(system, thread_handle, core, affinity_mask);  }  static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) { @@ -1918,6 +2111,10 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle      return RESULT_SUCCESS;  } +static ResultCode CreateEvent32(Core::System& system, Handle* write_handle, Handle* read_handle) { +    return CreateEvent(system, write_handle, read_handle); +} +  static ResultCode ClearEvent(Core::System& system, Handle handle) {      LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle); @@ -1939,6 +2136,10 @@ static ResultCode ClearEvent(Core::System& system, Handle handle) {      return ERR_INVALID_HANDLE;  } +static ResultCode ClearEvent32(Core::System& system, Handle handle) { +    return ClearEvent(system, handle); +} +  static ResultCode SignalEvent(Core::System& system, Handle handle) {      LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle); @@ -1951,10 +2152,13 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) {      }      writable_event->Signal(); -    system.PrepareReschedule();      return RESULT_SUCCESS;  } +static ResultCode SignalEvent32(Core::System& system, Handle handle) { +    return SignalEvent(system, handle); +} +  static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) {      LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type); @@ -1982,6 +2186,7 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_  }  static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) { +    std::lock_guard lock{HLE::g_hle_lock};      LOG_DEBUG(Kernel_SVC, "called");      auto& kernel = system.Kernel(); @@ -2139,6 +2344,15 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd      return RESULT_SUCCESS;  } +static ResultCode FlushProcessDataCache32(Core::System& system, Handle handle, u32 address, +                                          u32 size) { +    // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a nope +    // as all emulation is done in the same cache level in host architecture, thus data cache +    // does not need flushing. +    LOG_DEBUG(Kernel_SVC, "called"); +    return RESULT_SUCCESS; +} +  namespace {  struct FunctionDef {      using Func = void(Core::System&); @@ -2153,57 +2367,57 @@ static const FunctionDef SVC_Table_32[] = {      {0x00, nullptr, "Unknown"},      {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"},      {0x02, nullptr, "Unknown"}, -    {0x03, nullptr, "SetMemoryAttribute32"}, -    {0x04, nullptr, "MapMemory32"}, -    {0x05, nullptr, "UnmapMemory32"}, +    {0x03, SvcWrap32<SetMemoryAttribute32>, "SetMemoryAttribute32"}, +    {0x04, SvcWrap32<MapMemory32>, "MapMemory32"}, +    {0x05, SvcWrap32<UnmapMemory32>, "UnmapMemory32"},      {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"}, -    {0x07, nullptr, "ExitProcess32"}, -    {0x08, nullptr, "CreateThread32"}, -    {0x09, nullptr, "StartThread32"}, -    {0x0a, nullptr, "ExitThread32"}, -    {0x0b, nullptr, "SleepThread32"}, +    {0x07, SvcWrap32<ExitProcess32>, "ExitProcess32"}, +    {0x08, SvcWrap32<CreateThread32>, "CreateThread32"}, +    {0x09, SvcWrap32<StartThread32>, "StartThread32"}, +    {0x0a, SvcWrap32<ExitThread32>, "ExitThread32"}, +    {0x0b, SvcWrap32<SleepThread32>, "SleepThread32"},      {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"}, -    {0x0d, nullptr, "SetThreadPriority32"}, -    {0x0e, nullptr, "GetThreadCoreMask32"}, -    {0x0f, nullptr, "SetThreadCoreMask32"}, -    {0x10, nullptr, "GetCurrentProcessorNumber32"}, -    {0x11, nullptr, "SignalEvent32"}, -    {0x12, nullptr, "ClearEvent32"}, -    {0x13, nullptr, "MapSharedMemory32"}, +    {0x0d, SvcWrap32<SetThreadPriority32>, "SetThreadPriority32"}, +    {0x0e, SvcWrap32<GetThreadCoreMask32>, "GetThreadCoreMask32"}, +    {0x0f, SvcWrap32<SetThreadCoreMask32>, "SetThreadCoreMask32"}, +    {0x10, SvcWrap32<GetCurrentProcessorNumber32>, "GetCurrentProcessorNumber32"}, +    {0x11, SvcWrap32<SignalEvent32>, "SignalEvent32"}, +    {0x12, SvcWrap32<ClearEvent32>, "ClearEvent32"}, +    {0x13, SvcWrap32<MapSharedMemory32>, "MapSharedMemory32"},      {0x14, nullptr, "UnmapSharedMemory32"}, -    {0x15, nullptr, "CreateTransferMemory32"}, +    {0x15, SvcWrap32<CreateTransferMemory32>, "CreateTransferMemory32"},      {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"}, -    {0x17, nullptr, "ResetSignal32"}, +    {0x17, SvcWrap32<ResetSignal32>, "ResetSignal32"},      {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"}, -    {0x19, nullptr, "CancelSynchronization32"}, -    {0x1a, nullptr, "ArbitrateLock32"}, -    {0x1b, nullptr, "ArbitrateUnlock32"}, -    {0x1c, nullptr, "WaitProcessWideKeyAtomic32"}, +    {0x19, SvcWrap32<CancelSynchronization32>, "CancelSynchronization32"}, +    {0x1a, SvcWrap32<ArbitrateLock32>, "ArbitrateLock32"}, +    {0x1b, SvcWrap32<ArbitrateUnlock32>, "ArbitrateUnlock32"}, +    {0x1c, SvcWrap32<WaitProcessWideKeyAtomic32>, "WaitProcessWideKeyAtomic32"},      {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"}, -    {0x1e, nullptr, "GetSystemTick32"}, +    {0x1e, SvcWrap32<GetSystemTick32>, "GetSystemTick32"},      {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"},      {0x20, nullptr, "Unknown"},      {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"},      {0x22, nullptr, "SendSyncRequestWithUserBuffer32"},      {0x23, nullptr, "Unknown"}, -    {0x24, nullptr, "GetProcessId32"}, +    {0x24, SvcWrap32<GetProcessId32>, "GetProcessId32"},      {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"}, -    {0x26, nullptr, "Break32"}, +    {0x26, SvcWrap32<Break32>, "Break32"},      {0x27, nullptr, "OutputDebugString32"},      {0x28, nullptr, "Unknown"},      {0x29, SvcWrap32<GetInfo32>, "GetInfo32"},      {0x2a, nullptr, "Unknown"},      {0x2b, nullptr, "Unknown"}, -    {0x2c, nullptr, "MapPhysicalMemory32"}, -    {0x2d, nullptr, "UnmapPhysicalMemory32"}, +    {0x2c, SvcWrap32<MapPhysicalMemory32>, "MapPhysicalMemory32"}, +    {0x2d, SvcWrap32<UnmapPhysicalMemory32>, "UnmapPhysicalMemory32"},      {0x2e, nullptr, "Unknown"},      {0x2f, nullptr, "Unknown"},      {0x30, nullptr, "Unknown"},      {0x31, nullptr, "Unknown"}, -    {0x32, nullptr, "SetThreadActivity32"}, -    {0x33, nullptr, "GetThreadContext32"}, -    {0x34, nullptr, "WaitForAddress32"}, -    {0x35, nullptr, "SignalToAddress32"}, +    {0x32, SvcWrap32<SetThreadActivity32>, "SetThreadActivity32"}, +    {0x33, SvcWrap32<GetThreadContext32>, "GetThreadContext32"}, +    {0x34, SvcWrap32<WaitForAddress32>, "WaitForAddress32"}, +    {0x35, SvcWrap32<SignalToAddress32>, "SignalToAddress32"},      {0x36, nullptr, "Unknown"},      {0x37, nullptr, "Unknown"},      {0x38, nullptr, "Unknown"}, @@ -2219,7 +2433,7 @@ static const FunctionDef SVC_Table_32[] = {      {0x42, nullptr, "Unknown"},      {0x43, nullptr, "ReplyAndReceive32"},      {0x44, nullptr, "Unknown"}, -    {0x45, nullptr, "CreateEvent32"}, +    {0x45, SvcWrap32<CreateEvent32>, "CreateEvent32"},      {0x46, nullptr, "Unknown"},      {0x47, nullptr, "Unknown"},      {0x48, nullptr, "Unknown"}, @@ -2245,7 +2459,7 @@ static const FunctionDef SVC_Table_32[] = {      {0x5c, nullptr, "Unknown"},      {0x5d, nullptr, "Unknown"},      {0x5e, nullptr, "Unknown"}, -    {0x5F, nullptr, "FlushProcessDataCache32"}, +    {0x5F, SvcWrap32<FlushProcessDataCache32>, "FlushProcessDataCache32"},      {0x60, nullptr, "Unknown"},      {0x61, nullptr, "Unknown"},      {0x62, nullptr, "Unknown"}, @@ -2423,13 +2637,10 @@ static const FunctionDef* GetSVCInfo64(u32 func_num) {      return &SVC_Table_64[func_num];  } -MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); -  void Call(Core::System& system, u32 immediate) { -    MICROPROFILE_SCOPE(Kernel_SVC); - -    // Lock the global kernel mutex when we enter the kernel HLE. -    std::lock_guard lock{HLE::g_hle_lock}; +    system.ExitDynarmicProfile(); +    auto& kernel = system.Kernel(); +    kernel.EnterSVCProfile();      const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)                                                                          : GetSVCInfo32(immediate); @@ -2442,6 +2653,9 @@ void Call(Core::System& system, u32 immediate) {      } else {          LOG_CRITICAL(Kernel_SVC, "Unknown SVC function 0x{:X}", immediate);      } + +    kernel.ExitSVCProfile(); +    system.EnterDynarmicProfile();  }  } // namespace Kernel::Svc diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 7d735e3fa..0b6dd9df0 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -350,13 +350,50 @@ void SvcWrap64(Core::System& system) {      func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));  } -// Used by QueryMemory32 +// Used by QueryMemory32, ArbitrateLock32  template <ResultCode func(Core::System&, u32, u32, u32)>  void SvcWrap32(Core::System& system) {      FuncReturn32(system,                   func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw);  } +// Used by Break32 +template <void func(Core::System&, u32, u32, u32)> +void SvcWrap32(Core::System& system) { +    func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)); +} + +// Used by ExitProcess32, ExitThread32 +template <void func(Core::System&)> +void SvcWrap32(Core::System& system) { +    func(system); +} + +// Used by GetCurrentProcessorNumber32 +template <u32 func(Core::System&)> +void SvcWrap32(Core::System& system) { +    FuncReturn32(system, func(system)); +} + +// Used by SleepThread32 +template <void func(Core::System&, u32, u32)> +void SvcWrap32(Core::System& system) { +    func(system, Param32(system, 0), Param32(system, 1)); +} + +// Used by CreateThread32 +template <ResultCode func(Core::System&, Handle*, u32, u32, u32, u32, s32)> +void SvcWrap32(Core::System& system) { +    Handle param_1 = 0; + +    const u32 retval = func(system, ¶m_1, Param32(system, 0), Param32(system, 1), +                            Param32(system, 2), Param32(system, 3), Param32(system, 4)) +                           .raw; + +    system.CurrentArmInterface().SetReg(1, param_1); +    FuncReturn(system, retval); +} +  // Used by GetInfo32  template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)>  void SvcWrap32(Core::System& system) { @@ -393,18 +430,114 @@ void SvcWrap32(Core::System& system) {      FuncReturn(system, retval);  } +// Used by GetSystemTick32 +template <void func(Core::System&, u32*, u32*)> +void SvcWrap32(Core::System& system) { +    u32 param_1 = 0; +    u32 param_2 = 0; + +    func(system, ¶m_1, ¶m_2); +    system.CurrentArmInterface().SetReg(0, param_1); +    system.CurrentArmInterface().SetReg(1, param_2); +} + +// Used by CreateEvent32 +template <ResultCode func(Core::System&, Handle*, Handle*)> +void SvcWrap32(Core::System& system) { +    Handle param_1 = 0; +    Handle param_2 = 0; + +    const u32 retval = func(system, ¶m_1, ¶m_2).raw; +    system.CurrentArmInterface().SetReg(1, param_1); +    system.CurrentArmInterface().SetReg(2, param_2); +    FuncReturn(system, retval); +} + +// Used by GetThreadId32 +template <ResultCode func(Core::System&, Handle, u32*, u32*, u32*)> +void SvcWrap32(Core::System& system) { +    u32 param_1 = 0; +    u32 param_2 = 0; +    u32 param_3 = 0; + +    const u32 retval = func(system, Param32(system, 2), ¶m_1, ¶m_2, ¶m_3).raw; +    system.CurrentArmInterface().SetReg(1, param_1); +    system.CurrentArmInterface().SetReg(2, param_2); +    system.CurrentArmInterface().SetReg(3, param_3); +    FuncReturn(system, retval); +} +  // Used by SignalProcessWideKey32  template <void func(Core::System&, u32, s32)>  void SvcWrap32(Core::System& system) {      func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1)));  } -// Used by SendSyncRequest32 +// Used by SetThreadPriority32 +template <ResultCode func(Core::System&, Handle, u32)> +void SvcWrap32(Core::System& system) { +    const u32 retval = +        func(system, static_cast<Handle>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw; +    FuncReturn(system, retval); +} + +// Used by SetThreadCoreMask32 +template <ResultCode func(Core::System&, Handle, u32, u32, u32)> +void SvcWrap32(Core::System& system) { +    const u32 retval = +        func(system, static_cast<Handle>(Param(system, 0)), static_cast<u32>(Param(system, 1)), +             static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) +            .raw; +    FuncReturn(system, retval); +} + +// Used by WaitProcessWideKeyAtomic32 +template <ResultCode func(Core::System&, u32, u32, Handle, u32, u32)> +void SvcWrap32(Core::System& system) { +    const u32 retval = +        func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)), +             static_cast<Handle>(Param(system, 2)), static_cast<u32>(Param(system, 3)), +             static_cast<u32>(Param(system, 4))) +            .raw; +    FuncReturn(system, retval); +} + +// Used by WaitForAddress32 +template <ResultCode func(Core::System&, u32, u32, s32, u32, u32)> +void SvcWrap32(Core::System& system) { +    const u32 retval = func(system, static_cast<u32>(Param(system, 0)), +                            static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)), +                            static_cast<u32>(Param(system, 3)), static_cast<u32>(Param(system, 4))) +                           .raw; +    FuncReturn(system, retval); +} + +// Used by SignalToAddress32 +template <ResultCode func(Core::System&, u32, u32, s32, s32)> +void SvcWrap32(Core::System& system) { +    const u32 retval = +        func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)), +             static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) +            .raw; +    FuncReturn(system, retval); +} + +// Used by SendSyncRequest32, ArbitrateUnlock32  template <ResultCode func(Core::System&, u32)>  void SvcWrap32(Core::System& system) {      FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);  } +// Used by CreateTransferMemory32 +template <ResultCode func(Core::System&, Handle*, u32, u32, u32)> +void SvcWrap32(Core::System& system) { +    Handle handle = 0; +    const u32 retval = +        func(system, &handle, Param32(system, 1), Param32(system, 2), Param32(system, 3)).raw; +    system.CurrentArmInterface().SetReg(1, handle); +    FuncReturn(system, retval); +} +  // Used by WaitSynchronization32  template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)>  void SvcWrap32(Core::System& system) { diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp index dc37fad1a..851b702a5 100644 --- a/src/core/hle/kernel/synchronization.cpp +++ b/src/core/hle/kernel/synchronization.cpp @@ -10,78 +10,107 @@  #include "core/hle/kernel/synchronization.h"  #include "core/hle/kernel/synchronization_object.h"  #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h"  namespace Kernel { -/// Default thread wakeup callback for WaitSynchronization -static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, -                                        std::shared_ptr<SynchronizationObject> object, -                                        std::size_t index) { -    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); - -    if (reason == ThreadWakeupReason::Timeout) { -        thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); -        return true; -    } - -    ASSERT(reason == ThreadWakeupReason::Signal); -    thread->SetWaitSynchronizationResult(RESULT_SUCCESS); -    thread->SetWaitSynchronizationOutput(static_cast<u32>(index)); -    return true; -} -  Synchronization::Synchronization(Core::System& system) : system{system} {}  void Synchronization::SignalObject(SynchronizationObject& obj) const { +    auto& kernel = system.Kernel(); +    SchedulerLock lock(kernel); +    auto& time_manager = kernel.TimeManager();      if (obj.IsSignaled()) { -        obj.WakeupAllWaitingThreads(); +        for (auto thread : obj.GetWaitingThreads()) { +            if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) { +                if (thread->GetStatus() != ThreadStatus::WaitHLEEvent) { +                    ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch); +                    ASSERT(thread->IsWaitingSync()); +                } +                thread->SetSynchronizationResults(&obj, RESULT_SUCCESS); +                thread->ResumeFromWait(); +            } +        } +        obj.ClearWaitingThreads();      }  }  std::pair<ResultCode, Handle> Synchronization::WaitFor(      std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) { +    auto& kernel = system.Kernel();      auto* const thread = system.CurrentScheduler().GetCurrentThread(); -    // Find the first object that is acquirable in the provided list of objects -    const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(), -                                  [thread](const std::shared_ptr<SynchronizationObject>& object) { -                                      return object->IsSignaled(); -                                  }); - -    if (itr != sync_objects.end()) { -        // We found a ready object, acquire it and set the result value -        SynchronizationObject* object = itr->get(); -        object->Acquire(thread); -        const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); -        return {RESULT_SUCCESS, index}; +    Handle event_handle = InvalidHandle; +    { +        SchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds); +        const auto itr = +            std::find_if(sync_objects.begin(), sync_objects.end(), +                         [thread](const std::shared_ptr<SynchronizationObject>& object) { +                             return object->IsSignaled(); +                         }); + +        if (itr != sync_objects.end()) { +            // We found a ready object, acquire it and set the result value +            SynchronizationObject* object = itr->get(); +            object->Acquire(thread); +            const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); +            lock.CancelSleep(); +            return {RESULT_SUCCESS, index}; +        } + +        if (nano_seconds == 0) { +            lock.CancelSleep(); +            return {RESULT_TIMEOUT, InvalidHandle}; +        } + +        if (thread->IsPendingTermination()) { +            lock.CancelSleep(); +            return {ERR_THREAD_TERMINATING, InvalidHandle}; +        } + +        if (thread->IsSyncCancelled()) { +            thread->SetSyncCancelled(false); +            lock.CancelSleep(); +            return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle}; +        } + +        for (auto& object : sync_objects) { +            object->AddWaitingThread(SharedFrom(thread)); +        } + +        thread->SetSynchronizationObjects(&sync_objects); +        thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT); +        thread->SetStatus(ThreadStatus::WaitSynch); +        thread->SetWaitingSync(true);      } +    thread->SetWaitingSync(false); -    // No objects were ready to be acquired, prepare to suspend the thread. - -    // If a timeout value of 0 was provided, just return the Timeout error code instead of -    // suspending the thread. -    if (nano_seconds == 0) { -        return {RESULT_TIMEOUT, InvalidHandle}; +    if (event_handle != InvalidHandle) { +        auto& time_manager = kernel.TimeManager(); +        time_manager.UnscheduleTimeEvent(event_handle);      } -    if (thread->IsSyncCancelled()) { -        thread->SetSyncCancelled(false); -        return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle}; +    { +        SchedulerLock lock(kernel); +        ResultCode signaling_result = thread->GetSignalingResult(); +        SynchronizationObject* signaling_object = thread->GetSignalingObject(); +        thread->SetSynchronizationObjects(nullptr); +        auto shared_thread = SharedFrom(thread); +        for (auto& obj : sync_objects) { +            obj->RemoveWaitingThread(shared_thread); +        } +        if (signaling_object != nullptr) { +            const auto itr = std::find_if( +                sync_objects.begin(), sync_objects.end(), +                [signaling_object](const std::shared_ptr<SynchronizationObject>& object) { +                    return object.get() == signaling_object; +                }); +            ASSERT(itr != sync_objects.end()); +            signaling_object->Acquire(thread); +            const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr)); +            return {signaling_result, index}; +        } +        return {signaling_result, -1};      } - -    for (auto& object : sync_objects) { -        object->AddWaitingThread(SharedFrom(thread)); -    } - -    thread->SetSynchronizationObjects(std::move(sync_objects)); -    thread->SetStatus(ThreadStatus::WaitSynch); - -    // Create an event to wake the thread up after the specified nanosecond delay has passed -    thread->WakeAfterDelay(nano_seconds); -    thread->SetWakeupCallback(DefaultThreadWakeupCallback); - -    system.PrepareReschedule(thread->GetProcessorID()); - -    return {RESULT_TIMEOUT, InvalidHandle};  }  } // namespace Kernel diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp index 43f3eef18..ba4d39157 100644 --- a/src/core/hle/kernel/synchronization_object.cpp +++ b/src/core/hle/kernel/synchronization_object.cpp @@ -38,68 +38,8 @@ void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread)          waiting_threads.erase(itr);  } -std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const { -    Thread* candidate = nullptr; -    u32 candidate_priority = THREADPRIO_LOWEST + 1; - -    for (const auto& thread : waiting_threads) { -        const ThreadStatus thread_status = thread->GetStatus(); - -        // The list of waiting threads must not contain threads that are not waiting to be awakened. -        ASSERT_MSG(thread_status == ThreadStatus::WaitSynch || -                       thread_status == ThreadStatus::WaitHLEEvent, -                   "Inconsistent thread statuses in waiting_threads"); - -        if (thread->GetPriority() >= candidate_priority) -            continue; - -        if (ShouldWait(thread.get())) -            continue; - -        candidate = thread.get(); -        candidate_priority = thread->GetPriority(); -    } - -    return SharedFrom(candidate); -} - -void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) { -    ASSERT(!ShouldWait(thread.get())); - -    if (!thread) { -        return; -    } - -    if (thread->IsSleepingOnWait()) { -        for (const auto& object : thread->GetSynchronizationObjects()) { -            ASSERT(!object->ShouldWait(thread.get())); -            object->Acquire(thread.get()); -        } -    } else { -        Acquire(thread.get()); -    } - -    const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this)); - -    thread->ClearSynchronizationObjects(); - -    thread->CancelWakeupTimer(); - -    bool resume = true; -    if (thread->HasWakeupCallback()) { -        resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Signal, thread, SharedFrom(this), -                                              index); -    } -    if (resume) { -        thread->ResumeFromWait(); -        kernel.PrepareReschedule(thread->GetProcessorID()); -    } -} - -void SynchronizationObject::WakeupAllWaitingThreads() { -    while (auto thread = GetHighestPriorityReadyThread()) { -        WakeupWaitingThread(thread); -    } +void SynchronizationObject::ClearWaitingThreads() { +    waiting_threads.clear();  }  const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const { diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h index 741c31faf..f89b24204 100644 --- a/src/core/hle/kernel/synchronization_object.h +++ b/src/core/hle/kernel/synchronization_object.h @@ -12,6 +12,7 @@  namespace Kernel {  class KernelCore; +class Synchronization;  class Thread;  /// Class that represents a Kernel object that a thread can be waiting on @@ -49,24 +50,11 @@ public:       */      void RemoveWaitingThread(std::shared_ptr<Thread> thread); -    /** -     * Wake up all threads waiting on this object that can be awoken, in priority order, -     * and set the synchronization result and output of the thread. -     */ -    void WakeupAllWaitingThreads(); - -    /** -     * Wakes up a single thread waiting on this object. -     * @param thread Thread that is waiting on this object to wakeup. -     */ -    void WakeupWaitingThread(std::shared_ptr<Thread> thread); - -    /// Obtains the highest priority thread that is ready to run from this object's waiting list. -    std::shared_ptr<Thread> GetHighestPriorityReadyThread() const; -      /// Get a const reference to the waiting threads list for debug use      const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const; +    void ClearWaitingThreads(); +  protected:      bool is_signaled{}; // Tells if this sync object is signalled; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index db7f379ac..2b1092697 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -9,12 +9,21 @@  #include "common/assert.h"  #include "common/common_types.h" +#include "common/fiber.h"  #include "common/logging/log.h"  #include "common/thread_queue_list.h"  #include "core/arm/arm_interface.h" +#ifdef ARCHITECTURE_x86_64 +#include "core/arm/dynarmic/arm_dynarmic_32.h" +#include "core/arm/dynarmic/arm_dynarmic_64.h" +#endif +#include "core/arm/cpu_interrupt_handler.h" +#include "core/arm/exclusive_monitor.h" +#include "core/arm/unicorn/arm_unicorn.h"  #include "core/core.h"  #include "core/core_timing.h"  #include "core/core_timing_util.h" +#include "core/cpu_manager.h"  #include "core/hardware_properties.h"  #include "core/hle/kernel/errors.h"  #include "core/hle/kernel/handle_table.h" @@ -23,6 +32,7 @@  #include "core/hle/kernel/process.h"  #include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h"  #include "core/hle/result.h"  #include "core/memory.h" @@ -44,46 +54,26 @@ Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}  Thread::~Thread() = default;  void Thread::Stop() { -    // Cancel any outstanding wakeup events for this thread -    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), -                                                             global_handle); -    kernel.GlobalHandleTable().Close(global_handle); -    global_handle = 0; -    SetStatus(ThreadStatus::Dead); -    Signal(); - -    // Clean up any dangling references in objects that this thread was waiting for -    for (auto& wait_object : wait_objects) { -        wait_object->RemoveWaitingThread(SharedFrom(this)); -    } -    wait_objects.clear(); - -    owner_process->UnregisterThread(this); - -    // Mark the TLS slot in the thread's page as free. -    owner_process->FreeTLSRegion(tls_address); -} - -void Thread::WakeAfterDelay(s64 nanoseconds) { -    // Don't schedule a wakeup if the thread wants to wait forever -    if (nanoseconds == -1) -        return; +    { +        SchedulerLock lock(kernel); +        SetStatus(ThreadStatus::Dead); +        Signal(); +        kernel.GlobalHandleTable().Close(global_handle); -    // This function might be called from any thread so we have to be cautious and use the -    // thread-safe version of ScheduleEvent. -    const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); -    Core::System::GetInstance().CoreTiming().ScheduleEvent( -        cycles, kernel.ThreadWakeupCallbackEventType(), global_handle); -} +        if (owner_process) { +            owner_process->UnregisterThread(this); -void Thread::CancelWakeupTimer() { -    Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), -                                                             global_handle); +            // Mark the TLS slot in the thread's page as free. +            owner_process->FreeTLSRegion(tls_address); +        } +        arm_interface.reset(); +        has_exited = true; +    } +    global_handle = 0;  }  void Thread::ResumeFromWait() { -    ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects"); - +    SchedulerLock lock(kernel);      switch (status) {      case ThreadStatus::Paused:      case ThreadStatus::WaitSynch: @@ -99,7 +89,7 @@ void Thread::ResumeFromWait() {      case ThreadStatus::Ready:          // The thread's wakeup callback must have already been cleared when the thread was first          // awoken. -        ASSERT(wakeup_callback == nullptr); +        ASSERT(hle_callback == nullptr);          // If the thread is waiting on multiple wait objects, it might be awoken more than once          // before actually resuming. We can ignore subsequent wakeups if the thread status has          // already been set to ThreadStatus::Ready. @@ -115,24 +105,31 @@ void Thread::ResumeFromWait() {          return;      } -    wakeup_callback = nullptr; +    SetStatus(ThreadStatus::Ready); +} + +void Thread::OnWakeUp() { +    SchedulerLock lock(kernel); -    if (activity == ThreadActivity::Paused) { -        SetStatus(ThreadStatus::Paused); -        return; -    } +    SetStatus(ThreadStatus::Ready); +} +ResultCode Thread::Start() { +    SchedulerLock lock(kernel);      SetStatus(ThreadStatus::Ready); +    return RESULT_SUCCESS;  }  void Thread::CancelWait() { -    if (GetSchedulingStatus() != ThreadSchedStatus::Paused) { +    SchedulerLock lock(kernel); +    if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) {          is_sync_cancelled = true;          return;      } +    // TODO(Blinkhawk): Implement cancel of server session      is_sync_cancelled = false; -    SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED); -    ResumeFromWait(); +    SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED); +    SetStatus(ThreadStatus::Ready);  }  static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top, @@ -153,12 +150,29 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,      context.fpcr = 0;  } -ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name, -                                                  VAddr entry_point, u32 priority, u64 arg, -                                                  s32 processor_id, VAddr stack_top, -                                                  Process& owner_process) { +std::shared_ptr<Common::Fiber>& Thread::GetHostContext() { +    return host_context; +} + +ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags, +                                                  std::string name, VAddr entry_point, u32 priority, +                                                  u64 arg, s32 processor_id, VAddr stack_top, +                                                  Process* owner_process) { +    std::function<void(void*)> init_func = system.GetCpuManager().GetGuestThreadStartFunc(); +    void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater(); +    return Create(system, type_flags, name, entry_point, priority, arg, processor_id, stack_top, +                  owner_process, std::move(init_func), init_func_parameter); +} + +ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags, +                                                  std::string name, VAddr entry_point, u32 priority, +                                                  u64 arg, s32 processor_id, VAddr stack_top, +                                                  Process* owner_process, +                                                  std::function<void(void*)>&& thread_start_func, +                                                  void* thread_start_parameter) { +    auto& kernel = system.Kernel();      // Check if priority is in ranged. Lowest priority -> highest priority id. -    if (priority > THREADPRIO_LOWEST) { +    if (priority > THREADPRIO_LOWEST && ((type_flags & THREADTYPE_IDLE) == 0)) {          LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority);          return ERR_INVALID_THREAD_PRIORITY;      } @@ -168,11 +182,12 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin          return ERR_INVALID_PROCESSOR_ID;      } -    auto& system = Core::System::GetInstance(); -    if (!system.Memory().IsValidVirtualAddress(owner_process, entry_point)) { -        LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); -        // TODO (bunnei): Find the correct error code to use here -        return RESULT_UNKNOWN; +    if (owner_process) { +        if (!system.Memory().IsValidVirtualAddress(*owner_process, entry_point)) { +            LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); +            // TODO (bunnei): Find the correct error code to use here +            return RESULT_UNKNOWN; +        }      }      std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel); @@ -183,51 +198,82 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin      thread->stack_top = stack_top;      thread->tpidr_el0 = 0;      thread->nominal_priority = thread->current_priority = priority; -    thread->last_running_ticks = system.CoreTiming().GetTicks(); +    thread->last_running_ticks = 0;      thread->processor_id = processor_id;      thread->ideal_core = processor_id;      thread->affinity_mask = 1ULL << processor_id; -    thread->wait_objects.clear(); +    thread->wait_objects = nullptr;      thread->mutex_wait_address = 0;      thread->condvar_wait_address = 0;      thread->wait_handle = 0;      thread->name = std::move(name);      thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap(); -    thread->owner_process = &owner_process; -    auto& scheduler = kernel.GlobalScheduler(); -    scheduler.AddThread(thread); -    thread->tls_address = thread->owner_process->CreateTLSRegion(); - -    thread->owner_process->RegisterThread(thread.get()); +    thread->owner_process = owner_process; +    thread->type = type_flags; +    if ((type_flags & THREADTYPE_IDLE) == 0) { +        auto& scheduler = kernel.GlobalScheduler(); +        scheduler.AddThread(thread); +    } +    if (owner_process) { +        thread->tls_address = thread->owner_process->CreateTLSRegion(); +        thread->owner_process->RegisterThread(thread.get()); +    } else { +        thread->tls_address = 0; +    } +    // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used +    // to initialize the context +    thread->arm_interface.reset(); +    if ((type_flags & THREADTYPE_HLE) == 0) { +#ifdef ARCHITECTURE_x86_64 +        if (owner_process && !owner_process->Is64BitProcess()) { +            thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>( +                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(), +                processor_id); +        } else { +            thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>( +                system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(), +                processor_id); +        } -    ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top), -                         static_cast<u32>(entry_point), static_cast<u32>(arg)); -    ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); +#else +        if (owner_process && !owner_process->Is64BitProcess()) { +            thread->arm_interface = std::make_shared<Core::ARM_Unicorn>( +                system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32, +                processor_id); +        } else { +            thread->arm_interface = std::make_shared<Core::ARM_Unicorn>( +                system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64, +                processor_id); +        } +        LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); +#endif +        ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top), +                             static_cast<u32>(entry_point), static_cast<u32>(arg)); +        ResetThreadContext64(thread->context_64, stack_top, entry_point, arg); +    } +    thread->host_context = +        std::make_shared<Common::Fiber>(std::move(thread_start_func), thread_start_parameter);      return MakeResult<std::shared_ptr<Thread>>(std::move(thread));  }  void Thread::SetPriority(u32 priority) { +    SchedulerLock lock(kernel);      ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST,                 "Invalid priority value.");      nominal_priority = priority;      UpdatePriority();  } -void Thread::SetWaitSynchronizationResult(ResultCode result) { -    context_32.cpu_registers[0] = result.raw; -    context_64.cpu_registers[0] = result.raw; -} - -void Thread::SetWaitSynchronizationOutput(s32 output) { -    context_32.cpu_registers[1] = output; -    context_64.cpu_registers[1] = output; +void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) { +    signaling_object = object; +    signaling_result = result;  }  s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { -    ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything"); -    const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object); -    return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); +    ASSERT_MSG(!wait_objects->empty(), "Thread is not waiting for anything"); +    const auto match = std::find(wait_objects->rbegin(), wait_objects->rend(), object); +    return static_cast<s32>(std::distance(match, wait_objects->rend()) - 1);  }  VAddr Thread::GetCommandBufferAddress() const { @@ -236,6 +282,14 @@ VAddr Thread::GetCommandBufferAddress() const {      return GetTLSAddress() + command_header_offset;  } +Core::ARM_Interface& Thread::ArmInterface() { +    return *arm_interface; +} + +const Core::ARM_Interface& Thread::ArmInterface() const { +    return *arm_interface; +} +  void Thread::SetStatus(ThreadStatus new_status) {      if (new_status == status) {          return; @@ -257,10 +311,6 @@ void Thread::SetStatus(ThreadStatus new_status) {          break;      } -    if (status == ThreadStatus::Running) { -        last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); -    } -      status = new_status;  } @@ -341,75 +391,116 @@ void Thread::UpdatePriority() {      lock_owner->UpdatePriority();  } -void Thread::ChangeCore(u32 core, u64 mask) { -    SetCoreAndAffinityMask(core, mask); -} -  bool Thread::AllSynchronizationObjectsReady() const { -    return std::none_of(wait_objects.begin(), wait_objects.end(), +    return std::none_of(wait_objects->begin(), wait_objects->end(),                          [this](const std::shared_ptr<SynchronizationObject>& object) {                              return object->ShouldWait(this);                          });  } -bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, -                                  std::shared_ptr<SynchronizationObject> object, -                                  std::size_t index) { -    ASSERT(wakeup_callback); -    return wakeup_callback(reason, std::move(thread), std::move(object), index); +bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) { +    ASSERT(hle_callback); +    return hle_callback(std::move(thread));  } -void Thread::SetActivity(ThreadActivity value) { -    activity = value; +ResultCode Thread::SetActivity(ThreadActivity value) { +    SchedulerLock lock(kernel); + +    auto sched_status = GetSchedulingStatus(); + +    if (sched_status != ThreadSchedStatus::Runnable && sched_status != ThreadSchedStatus::Paused) { +        return ERR_INVALID_STATE; +    } + +    if (IsPendingTermination()) { +        return RESULT_SUCCESS; +    }      if (value == ThreadActivity::Paused) { -        // Set status if not waiting -        if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { -            SetStatus(ThreadStatus::Paused); -            kernel.PrepareReschedule(processor_id); +        if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) != 0) { +            return ERR_INVALID_STATE; +        } +        AddSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag); +    } else { +        if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) == 0) { +            return ERR_INVALID_STATE;          } -    } else if (status == ThreadStatus::Paused) { -        // Ready to reschedule -        ResumeFromWait(); +        RemoveSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);      } +    return RESULT_SUCCESS;  } -void Thread::Sleep(s64 nanoseconds) { -    // Sleep current thread and check for next thread to schedule -    SetStatus(ThreadStatus::WaitSleep); +ResultCode Thread::Sleep(s64 nanoseconds) { +    Handle event_handle{}; +    { +        SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds); +        SetStatus(ThreadStatus::WaitSleep); +    } -    // Create an event to wake the thread up after the specified nanosecond delay has passed -    WakeAfterDelay(nanoseconds); +    if (event_handle != InvalidHandle) { +        auto& time_manager = kernel.TimeManager(); +        time_manager.UnscheduleTimeEvent(event_handle); +    } +    return RESULT_SUCCESS; +} + +std::pair<ResultCode, bool> Thread::YieldSimple() { +    bool is_redundant = false; +    { +        SchedulerLock lock(kernel); +        is_redundant = kernel.GlobalScheduler().YieldThread(this); +    } +    return {RESULT_SUCCESS, is_redundant}; +} + +std::pair<ResultCode, bool> Thread::YieldAndBalanceLoad() { +    bool is_redundant = false; +    { +        SchedulerLock lock(kernel); +        is_redundant = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this); +    } +    return {RESULT_SUCCESS, is_redundant};  } -bool Thread::YieldSimple() { -    auto& scheduler = kernel.GlobalScheduler(); -    return scheduler.YieldThread(this); +std::pair<ResultCode, bool> Thread::YieldAndWaitForLoadBalancing() { +    bool is_redundant = false; +    { +        SchedulerLock lock(kernel); +        is_redundant = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this); +    } +    return {RESULT_SUCCESS, is_redundant};  } -bool Thread::YieldAndBalanceLoad() { -    auto& scheduler = kernel.GlobalScheduler(); -    return scheduler.YieldThreadAndBalanceLoad(this); +void Thread::AddSchedulingFlag(ThreadSchedFlags flag) { +    const u32 old_state = scheduling_state; +    pausing_state |= static_cast<u32>(flag); +    const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); +    scheduling_state = base_scheduling | pausing_state; +    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);  } -bool Thread::YieldAndWaitForLoadBalancing() { -    auto& scheduler = kernel.GlobalScheduler(); -    return scheduler.YieldThreadAndWaitForLoadBalancing(this); +void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) { +    const u32 old_state = scheduling_state; +    pausing_state &= ~static_cast<u32>(flag); +    const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus()); +    scheduling_state = base_scheduling | pausing_state; +    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);  }  void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { -    const u32 old_flags = scheduling_state; +    const u32 old_state = scheduling_state;      scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |                         static_cast<u32>(new_status); -    AdjustSchedulingOnStatus(old_flags); +    kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);  }  void Thread::SetCurrentPriority(u32 new_priority) {      const u32 old_priority = std::exchange(current_priority, new_priority); -    AdjustSchedulingOnPriority(old_priority); +    kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority);  }  ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { +    SchedulerLock lock(kernel);      const auto HighestSetCore = [](u64 mask, u32 max_cores) {          for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) {              if (((mask >> core) & 1) != 0) { @@ -443,111 +534,12 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {                      processor_id = ideal_core;                  }              } -            AdjustSchedulingOnAffinity(old_affinity_mask, old_core); +            kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core);          }      }      return RESULT_SUCCESS;  } -void Thread::AdjustSchedulingOnStatus(u32 old_flags) { -    if (old_flags == scheduling_state) { -        return; -    } - -    auto& scheduler = kernel.GlobalScheduler(); -    if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) == -        ThreadSchedStatus::Runnable) { -        // In this case the thread was running, now it's pausing/exitting -        if (processor_id >= 0) { -            scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this); -        } - -        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { -            if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { -                scheduler.Unsuggest(current_priority, core, this); -            } -        } -    } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) { -        // The thread is now set to running from being stopped -        if (processor_id >= 0) { -            scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this); -        } - -        for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { -            if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { -                scheduler.Suggest(current_priority, core, this); -            } -        } -    } - -    scheduler.SetReselectionPending(); -} - -void Thread::AdjustSchedulingOnPriority(u32 old_priority) { -    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { -        return; -    } -    auto& scheduler = kernel.GlobalScheduler(); -    if (processor_id >= 0) { -        scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); -    } - -    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { -        if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { -            scheduler.Unsuggest(old_priority, core, this); -        } -    } - -    // Add thread to the new priority queues. -    Thread* current_thread = GetCurrentThread(); - -    if (processor_id >= 0) { -        if (current_thread == this) { -            scheduler.SchedulePrepend(current_priority, static_cast<u32>(processor_id), this); -        } else { -            scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this); -        } -    } - -    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { -        if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) { -            scheduler.Suggest(current_priority, core, this); -        } -    } - -    scheduler.SetReselectionPending(); -} - -void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { -    auto& scheduler = kernel.GlobalScheduler(); -    if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || -        current_priority >= THREADPRIO_COUNT) { -        return; -    } - -    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { -        if (((old_affinity_mask >> core) & 1) != 0) { -            if (core == static_cast<u32>(old_core)) { -                scheduler.Unschedule(current_priority, core, this); -            } else { -                scheduler.Unsuggest(current_priority, core, this); -            } -        } -    } - -    for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { -        if (((affinity_mask >> core) & 1) != 0) { -            if (core == static_cast<u32>(processor_id)) { -                scheduler.Schedule(current_priority, core, this); -            } else { -                scheduler.Suggest(current_priority, core, this); -            } -        } -    } - -    scheduler.SetReselectionPending(); -} -  ////////////////////////////////////////////////////////////////////////////////////////////////////  /** diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 23fdef8a4..c0342c462 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -6,26 +6,47 @@  #include <functional>  #include <string> +#include <utility>  #include <vector>  #include "common/common_types.h" +#include "common/spin_lock.h"  #include "core/arm/arm_interface.h"  #include "core/hle/kernel/object.h"  #include "core/hle/kernel/synchronization_object.h"  #include "core/hle/result.h" +namespace Common { +class Fiber; +} + +namespace Core { +class ARM_Interface; +class System; +} // namespace Core +  namespace Kernel { +class GlobalScheduler;  class KernelCore;  class Process;  class Scheduler;  enum ThreadPriority : u32 { -    THREADPRIO_HIGHEST = 0,       ///< Highest thread priority -    THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps -    THREADPRIO_DEFAULT = 44,      ///< Default thread priority for userland apps -    THREADPRIO_LOWEST = 63,       ///< Lowest thread priority -    THREADPRIO_COUNT = 64,        ///< Total number of possible thread priorities. +    THREADPRIO_HIGHEST = 0,            ///< Highest thread priority +    THREADPRIO_MAX_CORE_MIGRATION = 2, ///< Highest priority for a core migration +    THREADPRIO_USERLAND_MAX = 24,      ///< Highest thread priority for userland apps +    THREADPRIO_DEFAULT = 44,           ///< Default thread priority for userland apps +    THREADPRIO_LOWEST = 63,            ///< Lowest thread priority +    THREADPRIO_COUNT = 64,             ///< Total number of possible thread priorities. +}; + +enum ThreadType : u32 { +    THREADTYPE_USER = 0x1, +    THREADTYPE_KERNEL = 0x2, +    THREADTYPE_HLE = 0x4, +    THREADTYPE_IDLE = 0x8, +    THREADTYPE_SUSPEND = 0x10,  };  enum ThreadProcessorId : s32 { @@ -107,26 +128,45 @@ public:      using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; -    using WakeupCallback = -        std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, -                           std::shared_ptr<SynchronizationObject> object, std::size_t index)>; +    using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>; + +    /** +     * Creates and returns a new thread. The new thread is immediately scheduled +     * @param system The instance of the whole system +     * @param name The friendly name desired for the thread +     * @param entry_point The address at which the thread should start execution +     * @param priority The thread's priority +     * @param arg User data to pass to the thread +     * @param processor_id The ID(s) of the processors on which the thread is desired to be run +     * @param stack_top The address of the thread's stack top +     * @param owner_process The parent process for the thread, if null, it's a kernel thread +     * @return A shared pointer to the newly created thread +     */ +    static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags, +                                                     std::string name, VAddr entry_point, +                                                     u32 priority, u64 arg, s32 processor_id, +                                                     VAddr stack_top, Process* owner_process);      /**       * Creates and returns a new thread. The new thread is immediately scheduled -     * @param kernel The kernel instance this thread will be created under. +     * @param system The instance of the whole system       * @param name The friendly name desired for the thread       * @param entry_point The address at which the thread should start execution       * @param priority The thread's priority       * @param arg User data to pass to the thread       * @param processor_id The ID(s) of the processors on which the thread is desired to be run       * @param stack_top The address of the thread's stack top -     * @param owner_process The parent process for the thread +     * @param owner_process The parent process for the thread, if null, it's a kernel thread +     * @param thread_start_func The function where the host context will start. +     * @param thread_start_parameter The parameter which will passed to host context on init       * @return A shared pointer to the newly created thread       */ -    static ResultVal<std::shared_ptr<Thread>> Create(KernelCore& kernel, std::string name, -                                                     VAddr entry_point, u32 priority, u64 arg, -                                                     s32 processor_id, VAddr stack_top, -                                                     Process& owner_process); +    static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags, +                                                     std::string name, VAddr entry_point, +                                                     u32 priority, u64 arg, s32 processor_id, +                                                     VAddr stack_top, Process* owner_process, +                                                     std::function<void(void*)>&& thread_start_func, +                                                     void* thread_start_parameter);      std::string GetName() const override {          return name; @@ -181,7 +221,7 @@ public:      void UpdatePriority();      /// Changes the core that the thread is running or scheduled to run on. -    void ChangeCore(u32 core, u64 mask); +    ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);      /**       * Gets the thread's thread ID @@ -194,6 +234,10 @@ public:      /// Resumes a thread from waiting      void ResumeFromWait(); +    void OnWakeUp(); + +    ResultCode Start(); +      /// Cancels a waiting operation that this thread may or may not be within.      ///      /// When the thread is within a waiting state, this will set the thread's @@ -202,26 +246,19 @@ public:      ///      void CancelWait(); -    /** -     * Schedules an event to wake up the specified thread after the specified delay -     * @param nanoseconds The time this thread will be allowed to sleep for -     */ -    void WakeAfterDelay(s64 nanoseconds); +    void SetSynchronizationResults(SynchronizationObject* object, ResultCode result); -    /// Cancel any outstanding wakeup events for this thread -    void CancelWakeupTimer(); +    Core::ARM_Interface& ArmInterface(); -    /** -     * Sets the result after the thread awakens (from svcWaitSynchronization) -     * @param result Value to set to the returned result -     */ -    void SetWaitSynchronizationResult(ResultCode result); +    const Core::ARM_Interface& ArmInterface() const; -    /** -     * Sets the output parameter value after the thread awakens (from svcWaitSynchronization) -     * @param output Value to set to the output parameter -     */ -    void SetWaitSynchronizationOutput(s32 output); +    SynchronizationObject* GetSignalingObject() const { +        return signaling_object; +    } + +    ResultCode GetSignalingResult() const { +        return signaling_result; +    }      /**       * Retrieves the index that this particular object occupies in the list of objects @@ -269,11 +306,6 @@ public:       */      VAddr GetCommandBufferAddress() const; -    /// Returns whether this thread is waiting on objects from a WaitSynchronization call. -    bool IsSleepingOnWait() const { -        return status == ThreadStatus::WaitSynch; -    } -      ThreadContext32& GetContext32() {          return context_32;      } @@ -290,6 +322,28 @@ public:          return context_64;      } +    bool IsHLEThread() const { +        return (type & THREADTYPE_HLE) != 0; +    } + +    bool IsSuspendThread() const { +        return (type & THREADTYPE_SUSPEND) != 0; +    } + +    bool IsIdleThread() const { +        return (type & THREADTYPE_IDLE) != 0; +    } + +    bool WasRunning() const { +        return was_running; +    } + +    void SetWasRunning(bool value) { +        was_running = value; +    } + +    std::shared_ptr<Common::Fiber>& GetHostContext(); +      ThreadStatus GetStatus() const {          return status;      } @@ -325,18 +379,18 @@ public:      }      const ThreadSynchronizationObjects& GetSynchronizationObjects() const { -        return wait_objects; +        return *wait_objects;      } -    void SetSynchronizationObjects(ThreadSynchronizationObjects objects) { -        wait_objects = std::move(objects); +    void SetSynchronizationObjects(ThreadSynchronizationObjects* objects) { +        wait_objects = objects;      }      void ClearSynchronizationObjects() { -        for (const auto& waiting_object : wait_objects) { +        for (const auto& waiting_object : *wait_objects) {              waiting_object->RemoveWaitingThread(SharedFrom(this));          } -        wait_objects.clear(); +        wait_objects->clear();      }      /// Determines whether all the objects this thread is waiting on are ready. @@ -386,26 +440,35 @@ public:          arb_wait_address = address;      } -    bool HasWakeupCallback() const { -        return wakeup_callback != nullptr; +    bool HasHLECallback() const { +        return hle_callback != nullptr;      } -    void SetWakeupCallback(WakeupCallback callback) { -        wakeup_callback = std::move(callback); +    void SetHLECallback(HLECallback callback) { +        hle_callback = std::move(callback);      } -    void InvalidateWakeupCallback() { -        SetWakeupCallback(nullptr); +    void SetHLETimeEvent(Handle time_event) { +        hle_time_event = time_event;      } -    /** -     * Invokes the thread's wakeup callback. -     * -     * @pre A valid wakeup callback has been set. Violating this precondition -     *      will cause an assertion to trigger. -     */ -    bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread, -                              std::shared_ptr<SynchronizationObject> object, std::size_t index); +    void SetHLESyncObject(SynchronizationObject* object) { +        hle_object = object; +    } + +    Handle GetHLETimeEvent() const { +        return hle_time_event; +    } + +    SynchronizationObject* GetHLESyncObject() const { +        return hle_object; +    } + +    void InvalidateHLECallback() { +        SetHLECallback(nullptr); +    } + +    bool InvokeHLECallback(std::shared_ptr<Thread> thread);      u32 GetIdealCore() const {          return ideal_core; @@ -415,23 +478,19 @@ public:          return affinity_mask;      } -    ThreadActivity GetActivity() const { -        return activity; -    } - -    void SetActivity(ThreadActivity value); +    ResultCode SetActivity(ThreadActivity value);      /// Sleeps this thread for the given amount of nanoseconds. -    void Sleep(s64 nanoseconds); +    ResultCode Sleep(s64 nanoseconds);      /// Yields this thread without rebalancing loads. -    bool YieldSimple(); +    std::pair<ResultCode, bool> YieldSimple();      /// Yields this thread and does a load rebalancing. -    bool YieldAndBalanceLoad(); +    std::pair<ResultCode, bool> YieldAndBalanceLoad();      /// Yields this thread and if the core is left idle, loads are rebalanced -    bool YieldAndWaitForLoadBalancing(); +    std::pair<ResultCode, bool> YieldAndWaitForLoadBalancing();      void IncrementYieldCount() {          yield_count++; @@ -446,6 +505,10 @@ public:                                                static_cast<u32>(ThreadSchedMasks::LowMask));      } +    bool IsRunnable() const { +        return scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable); +    } +      bool IsRunning() const {          return is_running;      } @@ -466,17 +529,67 @@ public:          return global_handle;      } +    bool IsWaitingForArbitration() const { +        return waiting_for_arbitration; +    } + +    void WaitForArbitration(bool set) { +        waiting_for_arbitration = set; +    } + +    bool IsWaitingSync() const { +        return is_waiting_on_sync; +    } + +    void SetWaitingSync(bool is_waiting) { +        is_waiting_on_sync = is_waiting; +    } + +    bool IsPendingTermination() const { +        return will_be_terminated || GetSchedulingStatus() == ThreadSchedStatus::Exited; +    } + +    bool IsPaused() const { +        return pausing_state != 0; +    } + +    bool IsContinuousOnSVC() const { +        return is_continuous_on_svc; +    } + +    void SetContinuousOnSVC(bool is_continuous) { +        is_continuous_on_svc = is_continuous; +    } + +    bool IsPhantomMode() const { +        return is_phantom_mode; +    } + +    void SetPhantomMode(bool phantom) { +        is_phantom_mode = phantom; +    } + +    bool HasExited() const { +        return has_exited; +    } +  private: +    friend class GlobalScheduler; +    friend class Scheduler; +      void SetSchedulingStatus(ThreadSchedStatus new_status); +    void AddSchedulingFlag(ThreadSchedFlags flag); +    void RemoveSchedulingFlag(ThreadSchedFlags flag); +      void SetCurrentPriority(u32 new_priority); -    ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask); -    void AdjustSchedulingOnStatus(u32 old_flags); -    void AdjustSchedulingOnPriority(u32 old_priority);      void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); +    Common::SpinLock context_guard{};      ThreadContext32 context_32{};      ThreadContext64 context_64{}; +    std::unique_ptr<Core::ARM_Interface> arm_interface{}; +    std::shared_ptr<Common::Fiber> host_context{};      u64 thread_id = 0; @@ -485,6 +598,8 @@ private:      VAddr entry_point = 0;      VAddr stack_top = 0; +    ThreadType type; +      /// Nominal thread priority, as set by the emulated application.      /// The nominal priority is the thread priority without priority      /// inheritance taken into account. @@ -509,7 +624,10 @@ private:      /// Objects that the thread is waiting on, in the same order as they were      /// passed to WaitSynchronization. -    ThreadSynchronizationObjects wait_objects; +    ThreadSynchronizationObjects* wait_objects; + +    SynchronizationObject* signaling_object; +    ResultCode signaling_result{RESULT_SUCCESS};      /// List of threads that are waiting for a mutex that is held by this thread.      MutexWaitingThreads wait_mutex_threads; @@ -526,30 +644,39 @@ private:      /// If waiting for an AddressArbiter, this is the address being waited on.      VAddr arb_wait_address{0}; +    bool waiting_for_arbitration{};      /// Handle used as userdata to reference this object when inserting into the CoreTiming queue.      Handle global_handle = 0; -    /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread -    /// was waiting via WaitSynchronization then the object will be the last object that became -    /// available. In case of a timeout, the object will be nullptr. -    WakeupCallback wakeup_callback; +    /// Callback for HLE Events +    HLECallback hle_callback; +    Handle hle_time_event; +    SynchronizationObject* hle_object;      Scheduler* scheduler = nullptr;      u32 ideal_core{0xFFFFFFFF};      u64 affinity_mask{0x1}; -    ThreadActivity activity = ThreadActivity::Normal; -      s32 ideal_core_override = -1;      u64 affinity_mask_override = 0x1;      u32 affinity_override_count = 0;      u32 scheduling_state = 0; +    u32 pausing_state = 0;      bool is_running = false; +    bool is_waiting_on_sync = false;      bool is_sync_cancelled = false; +    bool is_continuous_on_svc = false; + +    bool will_be_terminated = false; +    bool is_phantom_mode = false; +    bool has_exited = false; + +    bool was_running = false; +      std::string name;  }; diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp index 21b290468..941305e8e 100644 --- a/src/core/hle/kernel/time_manager.cpp +++ b/src/core/hle/kernel/time_manager.cpp @@ -8,30 +8,37 @@  #include "core/core_timing_util.h"  #include "core/hle/kernel/handle_table.h"  #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/thread.h"  #include "core/hle/kernel/time_manager.h"  namespace Kernel { -TimeManager::TimeManager(Core::System& system) : system{system} { +TimeManager::TimeManager(Core::System& system_) : system{system_} {      time_manager_event_type = Core::Timing::CreateEvent(          "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) { +            SchedulerLock lock(system.Kernel());              Handle proper_handle = static_cast<Handle>(thread_handle); +            if (cancelled_events[proper_handle]) { +                return; +            }              std::shared_ptr<Thread> thread =                  this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); -            thread->ResumeFromWait(); +            thread->OnWakeUp();          });  }  void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) { +    event_handle = timetask->GetGlobalHandle();      if (nanoseconds > 0) {          ASSERT(timetask); -        event_handle = timetask->GetGlobalHandle(); -        const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); -        system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle); +        ASSERT(timetask->GetStatus() != ThreadStatus::Ready); +        ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex); +        system.CoreTiming().ScheduleEvent(nanoseconds, time_manager_event_type, event_handle);      } else {          event_handle = InvalidHandle;      } +    cancelled_events[event_handle] = false;  }  void TimeManager::UnscheduleTimeEvent(Handle event_handle) { @@ -39,6 +46,12 @@ void TimeManager::UnscheduleTimeEvent(Handle event_handle) {          return;      }      system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle); +    cancelled_events[event_handle] = true; +} + +void TimeManager::CancelTimeEvent(Thread* time_task) { +    Handle event_handle = time_task->GetGlobalHandle(); +    UnscheduleTimeEvent(event_handle);  }  } // namespace Kernel diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h index eaec486d1..307a18765 100644 --- a/src/core/hle/kernel/time_manager.h +++ b/src/core/hle/kernel/time_manager.h @@ -5,6 +5,7 @@  #pragma once  #include <memory> +#include <unordered_map>  #include "core/hle/kernel/object.h" @@ -35,9 +36,12 @@ public:      /// Unschedule an existing time event      void UnscheduleTimeEvent(Handle event_handle); +    void CancelTimeEvent(Thread* time_task); +  private:      Core::System& system;      std::shared_ptr<Core::Timing::EventType> time_manager_event_type; +    std::unordered_map<Handle, bool> cancelled_events;  };  } // namespace Kernel diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index 630a8b048..8ac856ec3 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -44,6 +44,218 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) {      return static_cast<u32>(std::min(size, max_jpeg_image_size));  } +class IManagerForSystemService final : public ServiceFramework<IManagerForSystemService> { +public: +    explicit IManagerForSystemService(Common::UUID user_id) +        : ServiceFramework("IManagerForSystemService") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "CheckAvailability"}, +            {1, nullptr, "GetAccountId"}, +            {2, nullptr, "EnsureIdTokenCacheAsync"}, +            {3, nullptr, "LoadIdTokenCache"}, +            {100, nullptr, "SetSystemProgramIdentification"}, +            {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+ +            {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+ +            {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+ +            {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0 +            {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+ +            {120, nullptr, "GetNintendoAccountId"}, +            {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+ +            {130, nullptr, "GetNintendoAccountUserResourceCache"}, +            {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"}, +            {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"}, +            {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+ +            {134, nullptr, "RefreshNintendoAccountVerificationUrlCache"}, // 9.0.0+ +            {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+ +            {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+ +            {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+ +            {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+ +            {150, nullptr, "CreateAuthorizationRequest"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +// 3.0.0+ +class IFloatingRegistrationRequest final : public ServiceFramework<IFloatingRegistrationRequest> { +public: +    explicit IFloatingRegistrationRequest(Common::UUID user_id) +        : ServiceFramework("IFloatingRegistrationRequest") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "GetSessionId"}, +            {12, nullptr, "GetAccountId"}, +            {13, nullptr, "GetLinkedNintendoAccountId"}, +            {14, nullptr, "GetNickname"}, +            {15, nullptr, "GetProfileImage"}, +            {21, nullptr, "LoadIdTokenCache"}, +            {100, nullptr, "RegisterUser"}, // [1.0.0-3.0.2] RegisterAsync +            {101, nullptr, "RegisterUserWithUid"}, // [1.0.0-3.0.2] RegisterWithUidAsync +            {102, nullptr, "RegisterNetworkServiceAccountAsync"}, // 4.0.0+ +            {103, nullptr, "RegisterNetworkServiceAccountWithUidAsync"}, // 4.0.0+ +            {110, nullptr, "SetSystemProgramIdentification"}, +            {111, nullptr, "EnsureIdTokenCacheAsync"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +class IAdministrator final : public ServiceFramework<IAdministrator> { +public: +    explicit IAdministrator(Common::UUID user_id) : ServiceFramework("IAdministrator") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "CheckAvailability"}, +            {1, nullptr, "GetAccountId"}, +            {2, nullptr, "EnsureIdTokenCacheAsync"}, +            {3, nullptr, "LoadIdTokenCache"}, +            {100, nullptr, "SetSystemProgramIdentification"}, +            {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+ +            {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+ +            {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+ +            {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0 +            {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+ +            {120, nullptr, "GetNintendoAccountId"}, +            {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+ +            {130, nullptr, "GetNintendoAccountUserResourceCache"}, +            {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"}, +            {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"}, +            {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+ +            {134, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsync"}, // 9.0.0+ +            {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+ +            {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+ +            {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+ +            {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+ +            {150, nullptr, "CreateAuthorizationRequest"}, +            {200, nullptr, "IsRegistered"}, +            {201, nullptr, "RegisterAsync"}, +            {202, nullptr, "UnregisterAsync"}, +            {203, nullptr, "DeleteRegistrationInfoLocally"}, +            {220, nullptr, "SynchronizeProfileAsync"}, +            {221, nullptr, "UploadProfileAsync"}, +            {222, nullptr, "SynchronizaProfileAsyncIfSecondsElapsed"}, +            {250, nullptr, "IsLinkedWithNintendoAccount"}, +            {251, nullptr, "CreateProcedureToLinkWithNintendoAccount"}, +            {252, nullptr, "ResumeProcedureToLinkWithNintendoAccount"}, +            {255, nullptr, "CreateProcedureToUpdateLinkageStateOfNintendoAccount"}, +            {256, nullptr, "ResumeProcedureToUpdateLinkageStateOfNintendoAccount"}, +            {260, nullptr, "CreateProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+ +            {261, nullptr, "ResumeProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+ +            {280, nullptr, "ProxyProcedureToAcquireApplicationAuthorizationForNintendoAccount"}, +            {290, nullptr, "GetRequestForNintendoAccountUserResourceView"}, // 8.0.0+ +            {300, nullptr, "TryRecoverNintendoAccountUserStateAsync"}, // 6.0.0+ +            {400, nullptr, "IsServiceEntryRequirementCacheRefreshRequiredForOnlinePlay"}, // 6.1.0+ +            {401, nullptr, "RefreshServiceEntryRequirementCacheForOnlinePlayAsync"}, // 6.1.0+ +            {900, nullptr, "GetAuthenticationInfoForWin"}, // 9.0.0+ +            {901, nullptr, "ImportAsyncForWin"}, // 9.0.0+ +            {997, nullptr, "DebugUnlinkNintendoAccountAsync"}, +            {998, nullptr, "DebugSetAvailabilityErrorDetail"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +class IAuthorizationRequest final : public ServiceFramework<IAuthorizationRequest> { +public: +    explicit IAuthorizationRequest(Common::UUID user_id) +        : ServiceFramework("IAuthorizationRequest") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "GetSessionId"}, +            {10, nullptr, "InvokeWithoutInteractionAsync"}, +            {19, nullptr, "IsAuthorized"}, +            {20, nullptr, "GetAuthorizationCode"}, +            {21, nullptr, "GetIdToken"}, +            {22, nullptr, "GetState"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +class IOAuthProcedure final : public ServiceFramework<IOAuthProcedure> { +public: +    explicit IOAuthProcedure(Common::UUID user_id) : ServiceFramework("IOAuthProcedure") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "PrepareAsync"}, +            {1, nullptr, "GetRequest"}, +            {2, nullptr, "ApplyResponse"}, +            {3, nullptr, "ApplyResponseAsync"}, +            {10, nullptr, "Suspend"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +// 3.0.0+ +class IOAuthProcedureForExternalNsa final : public ServiceFramework<IOAuthProcedureForExternalNsa> { +public: +    explicit IOAuthProcedureForExternalNsa(Common::UUID user_id) +        : ServiceFramework("IOAuthProcedureForExternalNsa") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "PrepareAsync"}, +            {1, nullptr, "GetRequest"}, +            {2, nullptr, "ApplyResponse"}, +            {3, nullptr, "ApplyResponseAsync"}, +            {10, nullptr, "Suspend"}, +            {100, nullptr, "GetAccountId"}, +            {101, nullptr, "GetLinkedNintendoAccountId"}, +            {102, nullptr, "GetNickname"}, +            {103, nullptr, "GetProfileImage"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +class IOAuthProcedureForNintendoAccountLinkage final +    : public ServiceFramework<IOAuthProcedureForNintendoAccountLinkage> { +public: +    explicit IOAuthProcedureForNintendoAccountLinkage(Common::UUID user_id) +        : ServiceFramework("IOAuthProcedureForNintendoAccountLinkage") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "PrepareAsync"}, +            {1, nullptr, "GetRequest"}, +            {2, nullptr, "ApplyResponse"}, +            {3, nullptr, "ApplyResponseAsync"}, +            {10, nullptr, "Suspend"}, +            {100, nullptr, "GetRequestWithTheme"}, +            {101, nullptr, "IsNetworkServiceAccountReplaced"}, +            {199, nullptr, "GetUrlForIntroductionOfExtraMembership"}, // 2.0.0 - 5.1.0 +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +class INotifier final : public ServiceFramework<INotifier> { +public: +    explicit INotifier(Common::UUID user_id) : ServiceFramework("INotifier") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "GetSystemEvent"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; +  class IProfileCommon : public ServiceFramework<IProfileCommon> {  public:      explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id, @@ -226,6 +438,54 @@ public:          : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}  }; +class IAsyncContext final : public ServiceFramework<IAsyncContext> { +public: +    explicit IAsyncContext(Common::UUID user_id) : ServiceFramework("IAsyncContext") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "GetSystemEvent"}, +            {1, nullptr, "Cancel"}, +            {2, nullptr, "HasDone"}, +            {3, nullptr, "GetResult"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +class ISessionObject final : public ServiceFramework<ISessionObject> { +public: +    explicit ISessionObject(Common::UUID user_id) : ServiceFramework("ISessionObject") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {999, nullptr, "Dummy"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +class IGuestLoginRequest final : public ServiceFramework<IGuestLoginRequest> { +public: +    explicit IGuestLoginRequest(Common::UUID) : ServiceFramework("IGuestLoginRequest") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "GetSessionId"}, +            {11, nullptr, "Unknown"}, // 1.0.0 - 2.3.0 (the name is blank on Switchbrew) +            {12, nullptr, "GetAccountId"}, +            {13, nullptr, "GetLinkedNintendoAccountId"}, +            {14, nullptr, "GetNickname"}, +            {15, nullptr, "GetProfileImage"}, +            {21, nullptr, "LoadIdTokenCache"}, // 3.0.0+ +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; +  class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {  public:      explicit IManagerForApplication(Common::UUID user_id) @@ -265,6 +525,87 @@ private:      Common::UUID user_id;  }; +// 6.0.0+ +class IAsyncNetworkServiceLicenseKindContext final +    : public ServiceFramework<IAsyncNetworkServiceLicenseKindContext> { +public: +    explicit IAsyncNetworkServiceLicenseKindContext(Common::UUID user_id) +        : ServiceFramework("IAsyncNetworkServiceLicenseKindContext") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "GetSystemEvent"}, +            {1, nullptr, "Cancel"}, +            {2, nullptr, "HasDone"}, +            {3, nullptr, "GetResult"}, +            {4, nullptr, "GetNetworkServiceLicenseKind"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +// 8.0.0+ +class IOAuthProcedureForUserRegistration final +    : public ServiceFramework<IOAuthProcedureForUserRegistration> { +public: +    explicit IOAuthProcedureForUserRegistration(Common::UUID user_id) +        : ServiceFramework("IOAuthProcedureForUserRegistration") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "PrepareAsync"}, +            {1, nullptr, "GetRequest"}, +            {2, nullptr, "ApplyResponse"}, +            {3, nullptr, "ApplyResponseAsync"}, +            {10, nullptr, "Suspend"}, +            {100, nullptr, "GetAccountId"}, +            {101, nullptr, "GetLinkedNintendoAccountId"}, +            {102, nullptr, "GetNickname"}, +            {103, nullptr, "GetProfileImage"}, +            {110, nullptr, "RegisterUserAsync"}, +            {111, nullptr, "GetUid"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +class DAUTH_O final : public ServiceFramework<DAUTH_O> { +public: +    explicit DAUTH_O(Common::UUID) : ServiceFramework("dauth:o") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "EnsureAuthenticationTokenCacheAsync"}, // [5.0.0-5.1.0] GeneratePostData +            {1, nullptr, "LoadAuthenticationTokenCache"}, // 6.0.0+ +            {2, nullptr, "InvalidateAuthenticationTokenCache"}, // 6.0.0+ +            {10, nullptr, "EnsureEdgeTokenCacheAsync"}, // 6.0.0+ +            {11, nullptr, "LoadEdgeTokenCache"}, // 6.0.0+ +            {12, nullptr, "InvalidateEdgeTokenCache"}, // 6.0.0+ +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; + +// 6.0.0+ +class IAsyncResult final : public ServiceFramework<IAsyncResult> { +public: +    explicit IAsyncResult(Common::UUID user_id) : ServiceFramework("IAsyncResult") { +        // clang-format off +        static const FunctionInfo functions[] = { +            {0, nullptr, "GetResult"}, +            {1, nullptr, "Cancel"}, +            {2, nullptr, "IsAvailable"}, +            {3, nullptr, "GetSystemEvent"}, +        }; +        // clang-format on + +        RegisterHandlers(functions); +    } +}; +  void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) {      LOG_DEBUG(Service_ACC, "called");      IPC::ResponseBuilder rb{ctx, 3}; @@ -435,6 +776,15 @@ void Module::Interface::ListQualifiedUsers(Kernel::HLERequestContext& ctx) {      rb.Push(RESULT_SUCCESS);  } +void Module::Interface::ListOpenContextStoredUsers(Kernel::HLERequestContext& ctx) { +    LOG_WARNING(Service_ACC, "(STUBBED) called"); + +    // TODO(ogniK): Handle open contexts +    ctx.WriteBuffer(profile_manager->GetOpenUsers()); +    IPC::ResponseBuilder rb{ctx, 2}; +    rb.Push(RESULT_SUCCESS); +} +  void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) {      LOG_DEBUG(Service_ACC, "called");      // A u8 is passed into this function which we can safely ignore. It's to determine if we have diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h index 74ca39d6e..d4c6395c6 100644 --- a/src/core/hle/service/acc/acc.h +++ b/src/core/hle/service/acc/acc.h @@ -34,6 +34,7 @@ public:          void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx);          void GetProfileEditor(Kernel::HLERequestContext& ctx);          void ListQualifiedUsers(Kernel::HLERequestContext& ctx); +        void ListOpenContextStoredUsers(Kernel::HLERequestContext& ctx);      private:          ResultCode InitializeApplicationInfoBase(); diff --git a/src/core/hle/service/acc/acc_aa.cpp b/src/core/hle/service/acc/acc_aa.cpp index 3bac6bcd1..51f119b12 100644 --- a/src/core/hle/service/acc/acc_aa.cpp +++ b/src/core/hle/service/acc/acc_aa.cpp @@ -13,8 +13,8 @@ ACC_AA::ACC_AA(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p          {0, nullptr, "EnsureCacheAsync"},          {1, nullptr, "LoadCache"},          {2, nullptr, "GetDeviceAccountId"}, -        {50, nullptr, "RegisterNotificationTokenAsync"}, -        {51, nullptr, "UnregisterNotificationTokenAsync"}, +        {50, nullptr, "RegisterNotificationTokenAsync"},   // 1.0.0 - 6.2.0 +        {51, nullptr, "UnregisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0      };      RegisterHandlers(functions);  } diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp index 2eefc6df5..d2bb8c2c8 100644 --- a/src/core/hle/service/acc/acc_su.cpp +++ b/src/core/hle/service/acc/acc_su.cpp @@ -17,28 +17,28 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p          {3, &ACC_SU::ListOpenUsers, "ListOpenUsers"},          {4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"},          {5, &ACC_SU::GetProfile, "GetProfile"}, -        {6, nullptr, "GetProfileDigest"}, +        {6, nullptr, "GetProfileDigest"}, // 3.0.0+          {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},          {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, -        {60, nullptr, "ListOpenContextStoredUsers"}, -        {99, nullptr, "DebugActivateOpenContextRetention"}, +        {60, &ACC_SU::ListOpenContextStoredUsers, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0 +        {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+          {100, nullptr, "GetUserRegistrationNotifier"},          {101, nullptr, "GetUserStateChangeNotifier"},          {102, nullptr, "GetBaasAccountManagerForSystemService"},          {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},          {104, nullptr, "GetProfileUpdateNotifier"}, -        {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, -        {106, nullptr, "GetProfileSyncNotifier"}, +        {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+ +        {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+          {110, nullptr, "StoreSaveDataThumbnail"},          {111, nullptr, "ClearSaveDataThumbnail"},          {112, nullptr, "LoadSaveDataThumbnail"}, -        {113, nullptr, "GetSaveDataThumbnailExistence"}, -        {120, nullptr, "ListOpenUsersInApplication"}, -        {130, nullptr, "ActivateOpenContextRetention"}, -        {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, -        {150, nullptr, "AuthenticateApplicationAsync"}, -        {190, nullptr, "GetUserLastOpenedApplication"}, -        {191, nullptr, "ActivateOpenContextHolder"}, +        {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+ +        {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+ +        {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+ +        {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+ +        {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+ +        {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0 +        {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+          {200, nullptr, "BeginUserRegistration"},          {201, nullptr, "CompleteUserRegistration"},          {202, nullptr, "CancelUserRegistration"}, @@ -46,15 +46,15 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p          {204, nullptr, "SetUserPosition"},          {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},          {206, nullptr, "CompleteUserRegistrationForcibly"}, -        {210, nullptr, "CreateFloatingRegistrationRequest"}, -        {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, -        {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, +        {210, nullptr, "CreateFloatingRegistrationRequest"}, // 3.0.0+ +        {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+ +        {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+          {230, nullptr, "AuthenticateServiceAsync"},          {250, nullptr, "GetBaasAccountAdministrator"},          {290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"}, -        {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, +        {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, // 3.0.0+          {299, nullptr, "SuspendBackgroundDaemon"}, -        {997, nullptr, "DebugInvalidateTokenCacheForUser"}, +        {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+          {998, nullptr, "DebugSetUserStateClose"},          {999, nullptr, "DebugSetUserStateOpen"},      }; diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp index fb4e7e772..cb44e06b7 100644 --- a/src/core/hle/service/acc/acc_u0.cpp +++ b/src/core/hle/service/acc/acc_u0.cpp @@ -17,23 +17,23 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p          {3, &ACC_U0::ListOpenUsers, "ListOpenUsers"},          {4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"},          {5, &ACC_U0::GetProfile, "GetProfile"}, -        {6, nullptr, "GetProfileDigest"}, +        {6, nullptr, "GetProfileDigest"}, // 3.0.0+          {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},          {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, -        {60, nullptr, "ListOpenContextStoredUsers"}, -        {99, nullptr, "DebugActivateOpenContextRetention"}, +        {60, &ACC_U0::ListOpenContextStoredUsers, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0 +        {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+          {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},          {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},          {102, nullptr, "AuthenticateApplicationAsync"}, -        {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, +        {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+          {110, nullptr, "StoreSaveDataThumbnail"},          {111, nullptr, "ClearSaveDataThumbnail"},          {120, nullptr, "CreateGuestLoginRequest"}, -        {130, nullptr, "LoadOpenContext"}, -        {131, nullptr, "ListOpenContextStoredUsers"}, -        {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, -        {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, -        {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, +        {130, nullptr, "LoadOpenContext"}, // 5.0.0+ +        {131, &ACC_U0::ListOpenContextStoredUsers, "ListOpenContextStoredUsers"}, // 6.0.0+ +        {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, // 6.0.0+ +        {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+ +        {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, // 6.0.0+      };      // clang-format on diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp index 9f29cdc82..a4aa5316a 100644 --- a/src/core/hle/service/acc/acc_u1.cpp +++ b/src/core/hle/service/acc/acc_u1.cpp @@ -17,28 +17,29 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p          {3, &ACC_U1::ListOpenUsers, "ListOpenUsers"},          {4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"},          {5, &ACC_U1::GetProfile, "GetProfile"}, -        {6, nullptr, "GetProfileDigest"}, +        {6, nullptr, "GetProfileDigest"}, // 3.0.0+          {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},          {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, -        {60, nullptr, "ListOpenContextStoredUsers"}, -        {99, nullptr, "DebugActivateOpenContextRetention"}, +        {60, &ACC_U1::ListOpenContextStoredUsers, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0 +        {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+          {100, nullptr, "GetUserRegistrationNotifier"},          {101, nullptr, "GetUserStateChangeNotifier"},          {102, nullptr, "GetBaasAccountManagerForSystemService"}, -        {103, nullptr, "GetProfileUpdateNotifier"}, -        {104, nullptr, "CheckNetworkServiceAvailabilityAsync"}, -        {105, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, -        {106, nullptr, "GetProfileSyncNotifier"}, +        {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, +        {104, nullptr, "GetProfileUpdateNotifier"}, +        {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+ +        {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+          {110, nullptr, "StoreSaveDataThumbnail"},          {111, nullptr, "ClearSaveDataThumbnail"},          {112, nullptr, "LoadSaveDataThumbnail"}, -        {113, nullptr, "GetSaveDataThumbnailExistence"}, -        {130, nullptr, "ActivateOpenContextRetention"}, -        {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, -        {150, nullptr, "AuthenticateApplicationAsync"}, -        {190, nullptr, "GetUserLastOpenedApplication"}, -        {191, nullptr, "ActivateOpenContextHolder"}, -        {997, nullptr, "DebugInvalidateTokenCacheForUser"}, +        {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+ +        {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+ +        {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+ +        {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+ +        {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+ +        {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0 +        {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+ +        {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+          {998, nullptr, "DebugSetUserStateClose"},          {999, nullptr, "DebugSetUserStateOpen"},      }; diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 4df74c4f9..1bb544dd8 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -68,6 +68,7 @@ IWindowController::IWindowController(Core::System& system_)      static const FunctionInfo functions[] = {          {0, nullptr, "CreateWindow"},          {1, &IWindowController::GetAppletResourceUserId, "GetAppletResourceUserId"}, +        {2, nullptr, "GetAppletResourceUserIdOfCallerApplet"},          {10, &IWindowController::AcquireForegroundRights, "AcquireForegroundRights"},          {11, nullptr, "ReleaseForegroundRights"},          {12, nullptr, "RejectToChangeIntoBackground"}, @@ -189,8 +190,8 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"          {5, nullptr, "GetLastForegroundCaptureImageEx"},          {6, nullptr, "GetLastApplicationCaptureImageEx"},          {7, nullptr, "GetCallerAppletCaptureImageEx"}, -        {8, nullptr, "TakeScreenShotOfOwnLayer"},  // 2.0.0+ -        {9, nullptr, "CopyBetweenCaptureBuffers"}, // 5.0.0+ +        {8, nullptr, "TakeScreenShotOfOwnLayer"}, +        {9, nullptr, "CopyBetweenCaptureBuffers"},          {10, nullptr, "AcquireLastApplicationCaptureBuffer"},          {11, nullptr, "ReleaseLastApplicationCaptureBuffer"},          {12, nullptr, "AcquireLastForegroundCaptureBuffer"}, @@ -200,17 +201,14 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"          {16, nullptr, "AcquireLastApplicationCaptureBufferEx"},          {17, nullptr, "AcquireLastForegroundCaptureBufferEx"},          {18, nullptr, "AcquireCallerAppletCaptureBufferEx"}, -        // 2.0.0+          {20, nullptr, "ClearCaptureBuffer"},          {21, nullptr, "ClearAppletTransitionBuffer"}, -        // 4.0.0+          {22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"},          {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"},          {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"},          {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"},          {26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"},          {27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"}, -        // 6.0.0+          {28, nullptr, "TakeScreenShotOfOwnLayerEx"},      };      // clang-format on @@ -225,7 +223,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {      static const FunctionInfo functions[] = {          {0, nullptr, "NotifyMessageToHomeMenuForDebug"},          {1, nullptr, "OpenMainApplication"}, -        {10, nullptr, "EmulateButtonEvent"}, +        {10, nullptr, "PerformSystemButtonPressing"},          {20, nullptr, "InvalidateTransitionLayer"},          {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},          {40, nullptr, "GetAppletResourceUsageInfo"}, @@ -267,7 +265,7 @@ ISelfController::ISelfController(Core::System& system,          {16, &ISelfController::SetOutOfFocusSuspendingEnabled, "SetOutOfFocusSuspendingEnabled"},          {17, nullptr, "SetControllerFirmwareUpdateSection"},          {18, nullptr, "SetRequiresCaptureButtonShortPressedMessage"}, -        {19, &ISelfController::SetScreenShotImageOrientation, "SetScreenShotImageOrientation"}, +        {19, &ISelfController::SetAlbumImageOrientation, "SetAlbumImageOrientation"},          {20, nullptr, "SetDesirableKeyboardLayout"},          {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"},          {41, nullptr, "IsSystemBufferSharingEnabled"}, @@ -443,7 +441,7 @@ void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext&      rb.Push(RESULT_SUCCESS);  } -void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) { +void ISelfController::SetAlbumImageOrientation(Kernel::HLERequestContext& ctx) {      LOG_WARNING(Service_AM, "(STUBBED) called");      IPC::ResponseBuilder rb{ctx, 2}; @@ -607,6 +605,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system,          {20, nullptr, "PushToGeneralChannel"},          {30, nullptr, "GetHomeButtonReaderLockAccessor"},          {31, nullptr, "GetReaderLockAccessorEx"}, +        {32, nullptr, "GetWriterLockAccessorEx"},          {40, nullptr, "GetCradleFwVersion"},          {50, &ICommonStateGetter::IsVrModeEnabled, "IsVrModeEnabled"},          {51, &ICommonStateGetter::SetVrModeEnabled, "SetVrModeEnabled"}, @@ -842,7 +841,7 @@ public:              {110, nullptr, "NeedsToExitProcess"},              {120, nullptr, "GetLibraryAppletInfo"},              {150, nullptr, "RequestForAppletToGetForeground"}, -            {160, nullptr, "GetIndirectLayerConsumerHandle"}, +            {160, &ILibraryAppletAccessor::GetIndirectLayerConsumerHandle, "GetIndirectLayerConsumerHandle"},          };          // clang-format on @@ -961,6 +960,18 @@ private:          rb.PushCopyObjects(applet->GetBroker().GetInteractiveDataEvent());      } +    void GetIndirectLayerConsumerHandle(Kernel::HLERequestContext& ctx) { +        LOG_WARNING(Service_AM, "(STUBBED) called"); + +        // We require a non-zero handle to be valid. Using 0xdeadbeef allows us to trace if this is +        // actually used anywhere +        constexpr u64 handle = 0xdeadbeef; + +        IPC::ResponseBuilder rb{ctx, 4}; +        rb.Push(RESULT_SUCCESS); +        rb.Push(handle); +    } +      std::shared_ptr<Applets::Applet> applet;  }; @@ -1132,6 +1143,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)          {24, nullptr, "GetLaunchStorageInfoForDebug"},          {25, &IApplicationFunctions::ExtendSaveData, "ExtendSaveData"},          {26, &IApplicationFunctions::GetSaveDataSize, "GetSaveDataSize"}, +        {27, nullptr, "CreateCacheStorage"},          {30, &IApplicationFunctions::BeginBlockingHomeButtonShortAndLongPressed, "BeginBlockingHomeButtonShortAndLongPressed"},          {31, &IApplicationFunctions::EndBlockingHomeButtonShortAndLongPressed, "EndBlockingHomeButtonShortAndLongPressed"},          {32, &IApplicationFunctions::BeginBlockingHomeButton, "BeginBlockingHomeButton"}, @@ -1157,6 +1169,8 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)          {120, nullptr, "ExecuteProgram"},          {121, nullptr, "ClearUserChannel"},          {122, nullptr, "UnpopToUserChannel"}, +        {123, nullptr, "GetPreviousProgramIndex"}, +        {124, nullptr, "EnableApplicationAllThreadDumpOnCrash"},          {130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"},          {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"},          {141, nullptr, "TryPopFromFriendInvitationStorageChannel"}, diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 469f7f814..2f69466ec 100644 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -138,7 +138,7 @@ private:      void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);      void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);      void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx); -    void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx); +    void SetAlbumImageOrientation(Kernel::HLERequestContext& ctx);      void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx);      void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);      void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index 54e63c138..fbe3686ae 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp @@ -30,7 +30,7 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters(                                                                         config.sub_text.size());      params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(),                                                                           config.guide_text.size()); -    params.initial_text = initial_text; +    params.initial_text = std::move(initial_text);      params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit;      params.password = static_cast<bool>(config.is_password);      params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position); @@ -60,7 +60,7 @@ void SoftwareKeyboard::Initialize() {      std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig));      const auto work_buffer_storage = broker.PopNormalDataToApplet(); -    ASSERT(work_buffer_storage != nullptr); +    ASSERT_OR_EXECUTE(work_buffer_storage != nullptr, { return; });      const auto& work_buffer = work_buffer_storage->GetData();      if (config.initial_string_size == 0) @@ -109,7 +109,7 @@ void SoftwareKeyboard::Execute() {      const auto parameters = ConvertToFrontendParameters(config, initial_text); -    frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(text); }, +    frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(std::move(text)); },                           parameters);  } diff --git a/src/core/hle/service/am/spsm.cpp b/src/core/hle/service/am/spsm.cpp index 003ee8667..f27729ce7 100644 --- a/src/core/hle/service/am/spsm.cpp +++ b/src/core/hle/service/am/spsm.cpp @@ -10,17 +10,17 @@ SPSM::SPSM() : ServiceFramework{"spsm"} {      // clang-format off      static const FunctionInfo functions[] = {          {0, nullptr, "GetState"}, -        {1, nullptr, "SleepSystemAndWaitAwake"}, -        {2, nullptr, "Unknown1"}, -        {3, nullptr, "Unknown2"}, +        {1, nullptr, "EnterSleep"}, +        {2, nullptr, "GetLastWakeReason"}, +        {3, nullptr, "Shutdown"},          {4, nullptr, "GetNotificationMessageEventHandle"}, -        {5, nullptr, "Unknown3"}, -        {6, nullptr, "Unknown4"}, -        {7, nullptr, "Unknown5"}, +        {5, nullptr, "ReceiveNotificationMessage"}, +        {6, nullptr, "AnalyzeLogForLastSleepWakeSequence"}, +        {7, nullptr, "ResetEventLog"},          {8, nullptr, "AnalyzePerformanceLogForLastSleepWakeSequence"},          {9, nullptr, "ChangeHomeButtonLongPressingTime"}, -        {10, nullptr, "Unknown6"}, -        {11, nullptr, "Unknown7"}, +        {10, nullptr, "PutErrorState"}, +        {11, nullptr, "InvalidateCurrentHomeButtonPressing"},      };      // clang-format on diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp index 4227a4adf..8e79f707b 100644 --- a/src/core/hle/service/aoc/aoc_u.cpp +++ b/src/core/hle/service/aoc/aoc_u.cpp @@ -60,6 +60,7 @@ AOC_U::AOC_U(Core::System& system)          {6, nullptr, "PrepareAddOnContentByApplicationId"},          {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},          {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, +        {9, nullptr, "GetAddOnContentLostErrorCode"},          {100, nullptr, "CreateEcPurchasedEventManager"},          {101, nullptr, "CreatePermanentEcPurchasedEventManager"},      }; diff --git a/src/core/hle/service/bcat/bcat.cpp b/src/core/hle/service/bcat/bcat.cpp index 8bb2528c9..b31766212 100644 --- a/src/core/hle/service/bcat/bcat.cpp +++ b/src/core/hle/service/bcat/bcat.cpp @@ -14,6 +14,8 @@ BCAT::BCAT(Core::System& system, std::shared_ptr<Module> module,          {0, &BCAT::CreateBcatService, "CreateBcatService"},          {1, &BCAT::CreateDeliveryCacheStorageService, "CreateDeliveryCacheStorageService"},          {2, &BCAT::CreateDeliveryCacheStorageServiceWithApplicationId, "CreateDeliveryCacheStorageServiceWithApplicationId"}, +        {3, nullptr, "CreateDeliveryCacheProgressService"}, +        {4, nullptr, "CreateDeliveryCacheProgressServiceWithApplicationId"},      };      // clang-format on      RegisterHandlers(functions); diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp index 34aba7a27..603b64d4f 100644 --- a/src/core/hle/service/bcat/module.cpp +++ b/src/core/hle/service/bcat/module.cpp @@ -143,10 +143,13 @@ public:              {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"},              {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"},              {30100, &IBcatService::SetPassphrase, "SetPassphrase"}, +            {30101, nullptr, "Unknown"}, +            {30102, nullptr, "Unknown2"},              {30200, nullptr, "RegisterBackgroundDeliveryTask"},              {30201, nullptr, "UnregisterBackgroundDeliveryTask"},              {30202, nullptr, "BlockDeliveryTask"},              {30203, nullptr, "UnblockDeliveryTask"}, +            {30210, nullptr, "SetDeliveryTaskTimer"},              {30300, nullptr, "RegisterSystemApplicationDeliveryTasks"},              {90100, nullptr, "EnumerateBackgroundDeliveryTask"},              {90200, nullptr, "GetDeliveryList"}, diff --git a/src/core/hle/service/bpc/bpc.cpp b/src/core/hle/service/bpc/bpc.cpp index 1c1ecdb60..fac6b2f9c 100644 --- a/src/core/hle/service/bpc/bpc.cpp +++ b/src/core/hle/service/bpc/bpc.cpp @@ -23,9 +23,14 @@ public:              {5, nullptr, "GetBoardPowerControlEvent"},              {6, nullptr, "GetSleepButtonState"},              {7, nullptr, "GetPowerEvent"}, -            {8, nullptr, "Unknown1"}, -            {9, nullptr, "Unknown2"}, -            {10, nullptr, "Unknown3"}, +            {8, nullptr, "CreateWakeupTimer"}, +            {9, nullptr, "CancelWakeupTimer"}, +            {10, nullptr, "EnableWakeupTimerOnDevice"}, +            {11, nullptr, "CreateWakeupTimerEx"}, +            {12, nullptr, "GetLastEnabledWakeupTimerType"}, +            {13, nullptr, "CleanAllWakeupTimers"}, +            {14, nullptr, "Unknown"}, +            {15, nullptr, "Unknown2"},          };          // clang-format on @@ -38,10 +43,11 @@ public:      explicit BPC_R() : ServiceFramework{"bpc:r"} {          // clang-format off          static const FunctionInfo functions[] = { -            {0, nullptr, "GetExternalRtcValue"}, -            {1, nullptr, "SetExternalRtcValue"}, -            {2, nullptr, "ReadExternalRtcResetFlag"}, -            {3, nullptr, "ClearExternalRtcResetFlag"}, +            {0, nullptr, "GetRtcTime"}, +            {1, nullptr, "SetRtcTime"}, +            {2, nullptr, "GetRtcResetDetected"}, +            {3, nullptr, "ClearRtcResetDetected"}, +            {4, nullptr, "SetUpRtcResetOnShutdown"},          };          // clang-format on diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp index 40a06c9fd..f311afa2f 100644 --- a/src/core/hle/service/btdrv/btdrv.cpp +++ b/src/core/hle/service/btdrv/btdrv.cpp @@ -58,102 +58,103 @@ public:              {1, nullptr, "InitializeBluetooth"},              {2, nullptr, "EnableBluetooth"},              {3, nullptr, "DisableBluetooth"}, -            {4, nullptr, "CleanupBluetooth"}, +            {4, nullptr, "FinalizeBluetooth"},              {5, nullptr, "GetAdapterProperties"},              {6, nullptr, "GetAdapterProperty"},              {7, nullptr, "SetAdapterProperty"}, -            {8, nullptr, "StartDiscovery"}, -            {9, nullptr, "CancelDiscovery"}, +            {8, nullptr, "StartInquiry"}, +            {9, nullptr, "StopInquiry"},              {10, nullptr, "CreateBond"},              {11, nullptr, "RemoveBond"},              {12, nullptr, "CancelBond"}, -            {13, nullptr, "PinReply"}, -            {14, nullptr, "SspReply"}, +            {13, nullptr, "RespondToPinRequest"}, +            {14, nullptr, "RespondToSspRequest"},              {15, nullptr, "GetEventInfo"},              {16, nullptr, "InitializeHid"}, -            {17, nullptr, "HidConnect"}, -            {18, nullptr, "HidDisconnect"}, -            {19, nullptr, "HidSendData"}, -            {20, nullptr, "HidSendData2"}, -            {21, nullptr, "HidSetReport"}, -            {22, nullptr, "HidGetReport"}, -            {23, nullptr, "HidWakeController"}, -            {24, nullptr, "HidAddPairedDevice"}, -            {25, nullptr, "HidGetPairedDevice"}, -            {26, nullptr, "CleanupHid"}, -            {27, nullptr, "HidGetEventInfo"}, -            {28, nullptr, "ExtSetTsi"}, -            {29, nullptr, "ExtSetBurstMode"}, -            {30, nullptr, "ExtSetZeroRetran"}, -            {31, nullptr, "ExtSetMcMode"}, -            {32, nullptr, "ExtStartLlrMode"}, -            {33, nullptr, "ExtExitLlrMode"}, -            {34, nullptr, "ExtSetRadio"}, -            {35, nullptr, "ExtSetVisibility"}, -            {36, nullptr, "ExtSetTbfcScan"}, +            {17, nullptr, "OpenHidConnection"}, +            {18, nullptr, "CloseHidConnection"}, +            {19, nullptr, "WriteHidData"}, +            {20, nullptr, "WriteHidData2"}, +            {21, nullptr, "SetHidReport"}, +            {22, nullptr, "GetHidReport"}, +            {23, nullptr, "TriggerConnection"}, +            {24, nullptr, "AddPairedDeviceInfo"}, +            {25, nullptr, "GetPairedDeviceInfo"}, +            {26, nullptr, "FinalizeHid"}, +            {27, nullptr, "GetHidEventInfo"}, +            {28, nullptr, "SetTsi"}, +            {29, nullptr, "EnableBurstMode"}, +            {30, nullptr, "SetZeroRetransmission"}, +            {31, nullptr, "EnableMcMode"}, +            {32, nullptr, "EnableLlrScan"}, +            {33, nullptr, "DisableLlrScan"}, +            {34, nullptr, "EnableRadio"}, +            {35, nullptr, "SetVisibility"}, +            {36, nullptr, "EnableTbfcScan"},              {37, nullptr, "RegisterHidReportEvent"}, -            {38, nullptr, "HidGetReportEventInfo"}, +            {38, nullptr, "GetHidReportEventInfo"},              {39, nullptr, "GetLatestPlr"}, -            {40, nullptr, "ExtGetPendingConnections"}, +            {40, nullptr, "GetPendingConnections"},              {41, nullptr, "GetChannelMap"}, -            {42, nullptr, "EnableBluetoothBoostSetting"}, -            {43, nullptr, "IsBluetoothBoostSettingEnabled"}, -            {44, nullptr, "EnableBluetoothAfhSetting"}, -            {45, nullptr, "IsBluetoothAfhSettingEnabled"}, -            {46, nullptr, "InitializeBluetoothLe"}, -            {47, nullptr, "EnableBluetoothLe"}, -            {48, nullptr, "DisableBluetoothLe"}, -            {49, nullptr, "CleanupBluetoothLe"}, -            {50, nullptr, "SetLeVisibility"}, -            {51, nullptr, "SetLeConnectionParameter"}, -            {52, nullptr, "SetLeDefaultConnectionParameter"}, -            {53, nullptr, "SetLeAdvertiseData"}, -            {54, nullptr, "SetLeAdvertiseParameter"}, -            {55, nullptr, "StartLeScan"}, -            {56, nullptr, "StopLeScan"}, -            {57, nullptr, "AddLeScanFilterCondition"}, -            {58, nullptr, "DeleteLeScanFilterCondition"}, -            {59, nullptr, "DeleteLeScanFilter"}, -            {60, nullptr, "ClearLeScanFilters"}, -            {61, nullptr, "EnableLeScanFilter"}, -            {62, nullptr, "RegisterLeClient"}, -            {63, nullptr, "UnregisterLeClient"}, -            {64, nullptr, "UnregisterLeClientAll"}, -            {65, nullptr, "LeClientConnect"}, -            {66, nullptr, "LeClientCancelConnection"}, -            {67, nullptr, "LeClientDisconnect"}, -            {68, nullptr, "LeClientGetAttributes"}, -            {69, nullptr, "LeClientDiscoverService"}, -            {70, nullptr, "LeClientConfigureMtu"}, -            {71, nullptr, "RegisterLeServer"}, -            {72, nullptr, "UnregisterLeServer"}, -            {73, nullptr, "LeServerConnect"}, -            {74, nullptr, "LeServerDisconnect"}, -            {75, nullptr, "CreateLeService"}, -            {76, nullptr, "StartLeService"}, -            {77, nullptr, "AddLeCharacteristic"}, -            {78, nullptr, "AddLeDescriptor"}, -            {79, nullptr, "GetLeCoreEventInfo"}, -            {80, nullptr, "LeGetFirstCharacteristic"}, -            {81, nullptr, "LeGetNextCharacteristic"}, -            {82, nullptr, "LeGetFirstDescriptor"}, -            {83, nullptr, "LeGetNextDescriptor"}, -            {84, nullptr, "RegisterLeCoreDataPath"}, -            {85, nullptr, "UnregisterLeCoreDataPath"}, -            {86, nullptr, "RegisterLeHidDataPath"}, -            {87, nullptr, "UnregisterLeHidDataPath"}, -            {88, nullptr, "RegisterLeDataPath"}, -            {89, nullptr, "UnregisterLeDataPath"}, -            {90, nullptr, "LeClientReadCharacteristic"}, -            {91, nullptr, "LeClientReadDescriptor"}, -            {92, nullptr, "LeClientWriteCharacteristic"}, -            {93, nullptr, "LeClientWriteDescriptor"}, -            {94, nullptr, "LeClientRegisterNotification"}, -            {95, nullptr, "LeClientDeregisterNotification"}, +            {42, nullptr, "EnableTxPowerBoostSetting"}, +            {43, nullptr, "IsTxPowerBoostSettingEnabled"}, +            {44, nullptr, "EnableAfhSetting"}, +            {45, nullptr, "IsAfhSettingEnabled"}, +            {46, nullptr, "InitializeBle"}, +            {47, nullptr, "EnableBle"}, +            {48, nullptr, "DisableBle"}, +            {49, nullptr, "FinalizeBle"}, +            {50, nullptr, "SetBleVisibility"}, +            {51, nullptr, "SetBleConnectionParameter"}, +            {52, nullptr, "SetBleDefaultConnectionParameter"}, +            {53, nullptr, "SetBleAdvertiseData"}, +            {54, nullptr, "SetBleAdvertiseParameter"}, +            {55, nullptr, "StartBleScan"}, +            {56, nullptr, "StopBleScan"}, +            {57, nullptr, "AddBleScanFilterCondition"}, +            {58, nullptr, "DeleteBleScanFilterCondition"}, +            {59, nullptr, "DeleteBleScanFilter"}, +            {60, nullptr, "ClearBleScanFilters"}, +            {61, nullptr, "EnableBleScanFilter"}, +            {62, nullptr, "RegisterGattClient"}, +            {63, nullptr, "UnregisterGattClient"}, +            {64, nullptr, "UnregisterAllGattClients"}, +            {65, nullptr, "ConnectGattServer"}, +            {66, nullptr, "CancelConnectGattServer"}, +            {67, nullptr, "DisconnectGattServer"}, +            {68, nullptr, "GetGattAttribute"}, +            {69, nullptr, "GetGattService"}, +            {70, nullptr, "ConfigureAttMtu"}, +            {71, nullptr, "RegisterGattServer"}, +            {72, nullptr, "UnregisterGattServer"}, +            {73, nullptr, "ConnectGattClient"}, +            {74, nullptr, "DisconnectGattClient"}, +            {75, nullptr, "AddGattService"}, +            {76, nullptr, "EnableGattService"}, +            {77, nullptr, "AddGattCharacteristic"}, +            {78, nullptr, "AddGattDescriptor"}, +            {79, nullptr, "GetBleManagedEventInfo"}, +            {80, nullptr, "GetGattFirstCharacteristic"}, +            {81, nullptr, "GetGattNextCharacteristic"}, +            {82, nullptr, "GetGattFirstDescriptor"}, +            {83, nullptr, "GetGattNextDescriptor"}, +            {84, nullptr, "RegisterGattManagedDataPath"}, +            {85, nullptr, "UnregisterGattManagedDataPath"}, +            {86, nullptr, "RegisterGattHidDataPath"}, +            {87, nullptr, "UnregisterGattHidDataPath"}, +            {88, nullptr, "RegisterGattDataPath"}, +            {89, nullptr, "UnregisterGattDataPath"}, +            {90, nullptr, "ReadGattCharacteristic"}, +            {91, nullptr, "ReadGattDescriptor"}, +            {92, nullptr, "WriteGattCharacteristic"}, +            {93, nullptr, "WriteGattDescriptor"}, +            {94, nullptr, "RegisterGattNotification"}, +            {95, nullptr, "UnregisterGattNotification"},              {96, nullptr, "GetLeHidEventInfo"},              {97, nullptr, "RegisterBleHidEvent"}, -            {98, nullptr, "SetLeScanParameter"}, -            {256, nullptr, "GetIsManufacturingMode"}, +            {98, nullptr, "SetBleScanParameter"}, +            {99, nullptr, "MoveToSecondaryPiconet"}, +            {256, nullptr, "IsManufacturingMode"},              {257, nullptr, "EmulateBluetoothCrash"},              {258, nullptr, "GetBleChannelMap"},          }; diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp index 251b3c9df..0d251c6d0 100644 --- a/src/core/hle/service/btm/btm.cpp +++ b/src/core/hle/service/btm/btm.cpp @@ -132,66 +132,71 @@ public:      explicit BTM() : ServiceFramework{"btm"} {          // clang-format off          static const FunctionInfo functions[] = { -            {0, nullptr, "Unknown1"}, -            {1, nullptr, "Unknown2"}, -            {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"}, -            {3, nullptr, "Unknown3"}, -            {4, nullptr, "Unknown4"}, -            {5, nullptr, "Unknown5"}, -            {6, nullptr, "Unknown6"}, -            {7, nullptr, "Unknown7"}, -            {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"}, -            {9, nullptr, "Unknown8"}, -            {10, nullptr, "Unknown9"}, -            {11, nullptr, "Unknown10"}, -            {12, nullptr, "Unknown11"}, -            {13, nullptr, "Unknown12"}, +            {0, nullptr, "GetState"}, +            {1, nullptr, "GetHostDeviceProperty"}, +            {2, nullptr, "AcquireDeviceConditionEvent"}, +            {3, nullptr, "GetDeviceCondition"}, +            {4, nullptr, "SetBurstMode"}, +            {5, nullptr, "SetSlotMode"}, +            {6, nullptr, "SetBluetoothMode"}, +            {7, nullptr, "SetWlanMode"}, +            {8, nullptr, "AcquireDeviceInfoEvent"}, +            {9, nullptr, "GetDeviceInfo"}, +            {10, nullptr, "AddDeviceInfo"}, +            {11, nullptr, "RemoveDeviceInfo"}, +            {12, nullptr, "IncreaseDeviceInfoOrder"}, +            {13, nullptr, "LlrNotify"},              {14, nullptr, "EnableRadio"},              {15, nullptr, "DisableRadio"}, -            {16, nullptr, "Unknown13"}, -            {17, nullptr, "Unknown14"}, -            {18, nullptr, "Unknown15"}, -            {19, nullptr, "Unknown16"}, -            {20, nullptr, "Unknown17"}, -            {21, nullptr, "Unknown18"}, -            {22, nullptr, "Unknown19"}, -            {23, nullptr, "Unknown20"}, -            {24, nullptr, "Unknown21"}, -            {25, nullptr, "Unknown22"}, -            {26, nullptr, "Unknown23"}, -            {27, nullptr, "Unknown24"}, -            {28, nullptr, "Unknown25"}, -            {29, nullptr, "Unknown26"}, -            {30, nullptr, "Unknown27"}, -            {31, nullptr, "Unknown28"}, -            {32, nullptr, "Unknown29"}, -            {33, nullptr, "Unknown30"}, -            {34, nullptr, "Unknown31"}, -            {35, nullptr, "Unknown32"}, -            {36, nullptr, "Unknown33"}, -            {37, nullptr, "Unknown34"}, -            {38, nullptr, "Unknown35"}, -            {39, nullptr, "Unknown36"}, -            {40, nullptr, "Unknown37"}, -            {41, nullptr, "Unknown38"}, -            {42, nullptr, "Unknown39"}, -            {43, nullptr, "Unknown40"}, -            {44, nullptr, "Unknown41"}, -            {45, nullptr, "Unknown42"}, -            {46, nullptr, "Unknown43"}, -            {47, nullptr, "Unknown44"}, -            {48, nullptr, "Unknown45"}, -            {49, nullptr, "Unknown46"}, -            {50, nullptr, "Unknown47"}, -            {51, nullptr, "Unknown48"}, -            {52, nullptr, "Unknown49"}, -            {53, nullptr, "Unknown50"}, -            {54, nullptr, "Unknown51"}, -            {55, nullptr, "Unknown52"}, -            {56, nullptr, "Unknown53"}, -            {57, nullptr, "Unknown54"}, -            {58, nullptr, "Unknown55"}, -            {59, nullptr, "Unknown56"}, +            {16, nullptr, "HidDisconnect"}, +            {17, nullptr, "HidSetRetransmissionMode"}, +            {18, nullptr, "AcquireAwakeReqEvent"}, +            {19, nullptr, "AcquireLlrStateEvent"}, +            {20, nullptr, "IsLlrStarted"}, +            {21, nullptr, "EnableSlotSaving"}, +            {22, nullptr, "ProtectDeviceInfo"}, +            {23, nullptr, "AcquireBleScanEvent"}, +            {24, nullptr, "GetBleScanParameterGeneral"}, +            {25, nullptr, "GetBleScanParameterSmartDevice"}, +            {26, nullptr, "StartBleScanForGeneral"}, +            {27, nullptr, "StopBleScanForGeneral"}, +            {28, nullptr, "GetBleScanResultsForGeneral"}, +            {29, nullptr, "StartBleScanForPairedDevice"}, +            {30, nullptr, "StopBleScanForPairedDevice"}, +            {31, nullptr, "StartBleScanForSmartDevice"}, +            {32, nullptr, "StopBleScanForSmartDevice"}, +            {33, nullptr, "GetBleScanResultsForSmartDevice"}, +            {34, nullptr, "AcquireBleConnectionEvent"}, +            {35, nullptr, "BleConnect"}, +            {36, nullptr, "BleOverrideConnection"}, +            {37, nullptr, "BleDisconnect"}, +            {38, nullptr, "BleGetConnectionState"}, +            {39, nullptr, "BleGetGattClientConditionList"}, +            {40, nullptr, "AcquireBlePairingEvent"}, +            {41, nullptr, "BlePairDevice"}, +            {42, nullptr, "BleUnpairDeviceOnBoth"}, +            {43, nullptr, "BleUnpairDevice"}, +            {44, nullptr, "BleGetPairedAddresses"}, +            {45, nullptr, "AcquireBleServiceDiscoveryEvent"}, +            {46, nullptr, "GetGattServices"}, +            {47, nullptr, "GetGattService"}, +            {48, nullptr, "GetGattIncludedServices"}, +            {49, nullptr, "GetBelongingService"}, +            {50, nullptr, "GetGattCharacteristics"}, +            {51, nullptr, "GetGattDescriptors"}, +            {52, nullptr, "AcquireBleMtuConfigEvent"}, +            {53, nullptr, "ConfigureBleMtu"}, +            {54, nullptr, "GetBleMtu"}, +            {55, nullptr, "RegisterBleGattDataPath"}, +            {56, nullptr, "UnregisterBleGattDataPath"}, +            {57, nullptr, "RegisterAppletResourceUserId"}, +            {58, nullptr, "UnregisterAppletResourceUserId"}, +            {59, nullptr, "SetAppletResourceUserId"}, +            {60, nullptr, "Unknown60"}, +            {61, nullptr, "Unknown61"}, +            {62, nullptr, "Unknown62"}, +            {63, nullptr, "Unknown63"}, +            {64, nullptr, "Unknown64"},          };          // clang-format on @@ -204,19 +209,19 @@ public:      explicit BTM_DBG() : ServiceFramework{"btm:dbg"} {          // clang-format off          static const FunctionInfo functions[] = { -            {0, nullptr, "RegisterSystemEventForDiscovery"}, -            {1, nullptr, "Unknown1"}, -            {2, nullptr, "Unknown2"}, -            {3, nullptr, "Unknown3"}, -            {4, nullptr, "Unknown4"}, -            {5, nullptr, "Unknown5"}, -            {6, nullptr, "Unknown6"}, -            {7, nullptr, "Unknown7"}, -            {8, nullptr, "Unknown8"}, -            {9, nullptr, "Unknown9"}, -            {10, nullptr, "Unknown10"}, -            {11, nullptr, "Unknown11"}, -            {12, nullptr, "Unknown11"}, +            {0, nullptr, "AcquireDiscoveryEvent"}, +            {1, nullptr, "StartDiscovery"}, +            {2, nullptr, "CancelDiscovery"}, +            {3, nullptr, "GetDeviceProperty"}, +            {4, nullptr, "CreateBond"}, +            {5, nullptr, "CancelBond"}, +            {6, nullptr, "SetTsiMode"}, +            {7, nullptr, "GeneralTest"}, +            {8, nullptr, "HidConnect"}, +            {9, nullptr, "GeneralGet"}, +            {10, nullptr, "GetGattClientDisconnectionReason"}, +            {11, nullptr, "GetBleConnectionParameter"}, +            {12, nullptr, "GetBleConnectionParameterRequest"},          };          // clang-format on diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp index 26c8a7081..ba5749b84 100644 --- a/src/core/hle/service/caps/caps.cpp +++ b/src/core/hle/service/caps/caps.cpp @@ -1,4 +1,4 @@ -// Copyright 2018 yuzu emulator team +// Copyright 2018 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps.h b/src/core/hle/service/caps/caps.h index fc70a4c27..b8c67b6e2 100644 --- a/src/core/hle/service/caps/caps.h +++ b/src/core/hle/service/caps/caps.h @@ -1,4 +1,4 @@ -// Copyright 2018 yuzu emulator team +// Copyright 2018 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. @@ -12,73 +12,79 @@ class ServiceManager;  namespace Service::Capture { -enum AlbumImageOrientation { +enum class AlbumImageOrientation {      Orientation0 = 0,      Orientation1 = 1,      Orientation2 = 2,      Orientation3 = 3,  }; -enum AlbumReportOption { +enum class AlbumReportOption {      Disable = 0,      Enable = 1,  }; -enum ContentType : u8 { +enum class ContentType : u8 {      Screenshot = 0,      Movie = 1,      ExtraMovie = 3,  }; -enum AlbumStorage : u8 { +enum class AlbumStorage : u8 {      NAND = 0,      SD = 1,  };  struct AlbumFileDateTime { -    u16 year; -    u8 month; -    u8 day; -    u8 hour; -    u8 minute; -    u8 second; -    u8 uid; +    s16 year{}; +    s8 month{}; +    s8 day{}; +    s8 hour{}; +    s8 minute{}; +    s8 second{}; +    s8 uid{};  }; +static_assert(sizeof(AlbumFileDateTime) == 0x8, "AlbumFileDateTime has incorrect size.");  struct AlbumEntry { -    u64 size; -    u64 application_id; -    AlbumFileDateTime datetime; -    AlbumStorage storage; -    ContentType content; -    u8 padding[6]; +    u64 size{}; +    u64 application_id{}; +    AlbumFileDateTime datetime{}; +    AlbumStorage storage{}; +    ContentType content{}; +    INSERT_PADDING_BYTES(6);  }; +static_assert(sizeof(AlbumEntry) == 0x20, "AlbumEntry has incorrect size.");  struct AlbumFileEntry { -    u64 size; -    u64 hash; -    AlbumFileDateTime datetime; -    AlbumStorage storage; -    ContentType content; -    u8 padding[5]; -    u8 unknown; +    u64 size{}; // Size of the entry +    u64 hash{}; // AES256 with hardcoded key over AlbumEntry +    AlbumFileDateTime datetime{}; +    AlbumStorage storage{}; +    ContentType content{}; +    INSERT_PADDING_BYTES(5); +    u8 unknown{1}; // Set to 1 on official SW  }; +static_assert(sizeof(AlbumFileEntry) == 0x20, "AlbumFileEntry has incorrect size.");  struct ApplicationAlbumEntry { -    u64 size; -    u64 hash; -    AlbumFileDateTime datetime; -    AlbumStorage storage; -    ContentType content; -    u8 padding[5]; -    u8 unknown; +    u64 size{}; // Size of the entry +    u64 hash{}; // AES256 with hardcoded key over AlbumEntry +    AlbumFileDateTime datetime{}; +    AlbumStorage storage{}; +    ContentType content{}; +    INSERT_PADDING_BYTES(5); +    u8 unknown{1}; // Set to 1 on official SW  }; +static_assert(sizeof(ApplicationAlbumEntry) == 0x20, "ApplicationAlbumEntry has incorrect size.");  struct ApplicationAlbumFileEntry { -    ApplicationAlbumEntry entry; -    AlbumFileDateTime datetime; -    u64 unknown; +    ApplicationAlbumEntry entry{}; +    AlbumFileDateTime datetime{}; +    u64 unknown{};  }; +static_assert(sizeof(ApplicationAlbumFileEntry) == 0x30, +              "ApplicationAlbumFileEntry has incorrect size.");  /// Registers all Capture services with the specified service manager.  void InstallInterfaces(SM::ServiceManager& sm); diff --git a/src/core/hle/service/caps/caps_a.cpp b/src/core/hle/service/caps/caps_a.cpp index 88a3fdc05..a0a3b2ae3 100644 --- a/src/core/hle/service/caps/caps_a.cpp +++ b/src/core/hle/service/caps/caps_a.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_a.h b/src/core/hle/service/caps/caps_a.h index 8de832491..cb93aad5b 100644 --- a/src/core/hle/service/caps/caps_a.h +++ b/src/core/hle/service/caps/caps_a.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_c.cpp b/src/core/hle/service/caps/caps_c.cpp index ea6452ffa..ab17a187e 100644 --- a/src/core/hle/service/caps/caps_c.cpp +++ b/src/core/hle/service/caps/caps_c.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_c.h b/src/core/hle/service/caps/caps_c.h index d07cdb441..a9d028689 100644 --- a/src/core/hle/service/caps/caps_c.h +++ b/src/core/hle/service/caps/caps_c.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_sc.cpp b/src/core/hle/service/caps/caps_sc.cpp index d01a8a58e..822ee96c8 100644 --- a/src/core/hle/service/caps/caps_sc.cpp +++ b/src/core/hle/service/caps/caps_sc.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_sc.h b/src/core/hle/service/caps/caps_sc.h index 9ba372f7a..ac3e929ca 100644 --- a/src/core/hle/service/caps/caps_sc.h +++ b/src/core/hle/service/caps/caps_sc.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_ss.cpp b/src/core/hle/service/caps/caps_ss.cpp index eaa3a7494..24dc716e7 100644 --- a/src/core/hle/service/caps/caps_ss.cpp +++ b/src/core/hle/service/caps/caps_ss.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_ss.h b/src/core/hle/service/caps/caps_ss.h index e258a6925..450686e4f 100644 --- a/src/core/hle/service/caps/caps_ss.h +++ b/src/core/hle/service/caps/caps_ss.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_su.cpp b/src/core/hle/service/caps/caps_su.cpp index e8b0698e8..fffb2ecf9 100644 --- a/src/core/hle/service/caps/caps_su.cpp +++ b/src/core/hle/service/caps/caps_su.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_su.h b/src/core/hle/service/caps/caps_su.h index c494d7c84..62c9603a9 100644 --- a/src/core/hle/service/caps/caps_su.h +++ b/src/core/hle/service/caps/caps_su.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/caps/caps_u.cpp b/src/core/hle/service/caps/caps_u.cpp index 78bab6ed8..f36d8de2d 100644 --- a/src/core/hle/service/caps/caps_u.cpp +++ b/src/core/hle/service/caps/caps_u.cpp @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. @@ -58,19 +58,25 @@ void CAPS_U::GetAlbumContentsFileListForApplication(Kernel::HLERequestContext& c      // u8 ContentType, two s64s, and an u64 AppletResourceUserId. Returns an output u64 for total      // output entries (which is copied to a s32 by official SW).      IPC::RequestParser rp{ctx}; -    [[maybe_unused]] const auto application_album_file_entries = rp.PopRaw<std::array<u8, 0x30>>(); -    const auto pid = rp.Pop<s32>(); -    const auto content_type = rp.PopRaw<ContentType>(); -    [[maybe_unused]] const auto start_datetime = rp.PopRaw<AlbumFileDateTime>(); -    [[maybe_unused]] const auto end_datetime = rp.PopRaw<AlbumFileDateTime>(); -    const auto applet_resource_user_id = rp.Pop<u64>(); +    const auto pid{rp.Pop<s32>()}; +    const auto content_type{rp.PopEnum<ContentType>()}; +    const auto start_posix_time{rp.Pop<s64>()}; +    const auto end_posix_time{rp.Pop<s64>()}; +    const auto applet_resource_user_id{rp.Pop<u64>()}; + +    // TODO: Update this when we implement the album. +    // Currently we do not have a method of accessing album entries, set this to 0 for now. +    constexpr s32 total_entries{0}; +      LOG_WARNING(Service_Capture, -                "(STUBBED) called. pid={}, content_type={}, applet_resource_user_id={}", pid, -                content_type, applet_resource_user_id); +                "(STUBBED) called. pid={}, content_type={}, start_posix_time={}, " +                "end_posix_time={}, applet_resource_user_id={}, total_entries={}", +                pid, content_type, start_posix_time, end_posix_time, applet_resource_user_id, +                total_entries);      IPC::ResponseBuilder rb{ctx, 3};      rb.Push(RESULT_SUCCESS); -    rb.Push<s32>(0); +    rb.Push(total_entries);  }  } // namespace Service::Capture diff --git a/src/core/hle/service/caps/caps_u.h b/src/core/hle/service/caps/caps_u.h index e6e0716ff..689364de4 100644 --- a/src/core/hle/service/caps/caps_u.h +++ b/src/core/hle/service/caps/caps_u.h @@ -1,4 +1,4 @@ -// Copyright 2020 yuzu emulator team +// Copyright 2020 yuzu Emulator Project  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index f8e9df4b1..a41c73c48 100644 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp @@ -27,8 +27,8 @@ public:              {8, &ETicket::GetTitleKey, "GetTitleKey"},              {9, &ETicket::CountCommonTicket, "CountCommonTicket"},              {10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"}, -            {11, &ETicket::ListCommonTicket, "ListCommonTicket"}, -            {12, &ETicket::ListPersonalizedTicket, "ListPersonalizedTicket"}, +            {11, &ETicket::ListCommonTicketRightsIds, "ListCommonTicketRightsIds"}, +            {12, &ETicket::ListPersonalizedTicketRightsIds, "ListPersonalizedTicketRightsIds"},              {13, nullptr, "ListMissingPersonalizedTicket"},              {14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"},              {15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"}, @@ -55,7 +55,46 @@ public:              {36, nullptr, "DeleteAllInactiveELicenseRequiredPersonalizedTicket"},              {37, nullptr, "OwnTicket2"},              {38, nullptr, "OwnTicket3"}, +            {501, nullptr, "Unknown501"}, +            {502, nullptr, "Unknown502"},              {503, nullptr, "GetTitleKey"}, +            {504, nullptr, "Unknown504"}, +            {508, nullptr, "Unknown508"}, +            {509, nullptr, "Unknown509"}, +            {510, nullptr, "Unknown510"}, +            {511, nullptr, "Unknown511"}, +            {1001, nullptr, "Unknown1001"}, +            {1002, nullptr, "Unknown1001"}, +            {1003, nullptr, "Unknown1003"}, +            {1004, nullptr, "Unknown1004"}, +            {1005, nullptr, "Unknown1005"}, +            {1006, nullptr, "Unknown1006"}, +            {1007, nullptr, "Unknown1007"}, +            {1009, nullptr, "Unknown1009"}, +            {1010, nullptr, "Unknown1010"}, +            {1011, nullptr, "Unknown1011"}, +            {1012, nullptr, "Unknown1012"}, +            {1013, nullptr, "Unknown1013"}, +            {1014, nullptr, "Unknown1014"}, +            {1015, nullptr, "Unknown1015"}, +            {1016, nullptr, "Unknown1016"}, +            {1017, nullptr, "Unknown1017"}, +            {1018, nullptr, "Unknown1018"}, +            {1019, nullptr, "Unknown1019"}, +            {1020, nullptr, "Unknown1020"}, +            {1021, nullptr, "Unknown1021"}, +            {1501, nullptr, "Unknown1501"}, +            {1502, nullptr, "Unknown1502"}, +            {1503, nullptr, "Unknown1503"}, +            {1504, nullptr, "Unknown1504"}, +            {1505, nullptr, "Unknown1505"}, +            {2000, nullptr, "Unknown2000"}, +            {2001, nullptr, "Unknown2001"}, +            {2100, nullptr, "Unknown2100"}, +            {2501, nullptr, "Unknown2501"}, +            {2502, nullptr, "Unknown2502"}, +            {3001, nullptr, "Unknown3001"}, +            {3002, nullptr, "Unknown3002"},          };          // clang-format on          RegisterHandlers(functions); @@ -147,7 +186,7 @@ private:          rb.Push<u32>(count);      } -    void ListCommonTicket(Kernel::HLERequestContext& ctx) { +    void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) {          u32 out_entries;          if (keys.GetCommonTickets().empty())              out_entries = 0; @@ -170,7 +209,7 @@ private:          rb.Push<u32>(out_entries);      } -    void ListPersonalizedTicket(Kernel::HLERequestContext& ctx) { +    void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) {          u32 out_entries;          if (keys.GetPersonalizedTickets().empty())              out_entries = 0; @@ -263,7 +302,7 @@ private:          rb.Push<u64>(write_size);      } -    Core::Crypto::KeyManager keys; +    Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();  };  void InstallInterfaces(SM::ServiceManager& service_manager) { diff --git a/src/core/hle/service/eupld/eupld.cpp b/src/core/hle/service/eupld/eupld.cpp index 2df30acee..0d6d244f4 100644 --- a/src/core/hle/service/eupld/eupld.cpp +++ b/src/core/hle/service/eupld/eupld.cpp @@ -19,6 +19,7 @@ public:              {1, nullptr, "ImportCrt"},              {2, nullptr, "ImportPki"},              {3, nullptr, "SetAutoUpload"}, +            {4, nullptr, "GetAutoUpload"},          };          // clang-format on diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index 68f259b70..b7adaffc7 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp @@ -25,9 +25,13 @@ public:              {10101, &IFriendService::GetFriendList, "GetFriendList"},              {10102, nullptr, "UpdateFriendInfo"},              {10110, nullptr, "GetFriendProfileImage"}, +            {10120, nullptr, "Unknown10120"}, +            {10121, nullptr, "Unknown10121"},              {10200, nullptr, "SendFriendRequestForApplication"},              {10211, nullptr, "AddFacedFriendRequestForApplication"},              {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"}, +            {10420, nullptr, "Unknown10420"}, +            {10421, nullptr, "Unknown10421"},              {10500, nullptr, "GetProfileList"},              {10600, nullptr, "DeclareOpenOnlinePlaySession"},              {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, @@ -97,6 +101,8 @@ public:              {30900, nullptr, "SendFriendInvitation"},              {30910, nullptr, "ReadFriendInvitation"},              {30911, nullptr, "ReadAllFriendInvitations"}, +            {40100, nullptr, "Unknown40100"}, +            {40400, nullptr, "Unknown40400"},              {49900, nullptr, "DeleteNetworkServiceAccountCache"},          };          // clang-format on diff --git a/src/core/hle/service/grc/grc.cpp b/src/core/hle/service/grc/grc.cpp index 24910ac6c..401e0b208 100644 --- a/src/core/hle/service/grc/grc.cpp +++ b/src/core/hle/service/grc/grc.cpp @@ -17,6 +17,9 @@ public:          static const FunctionInfo functions[] = {              {1, nullptr, "OpenContinuousRecorder"},              {2, nullptr, "OpenGameMovieTrimmer"}, +            {3, nullptr, "OpenOffscreenRecorder"}, +            {101, nullptr, "CreateMovieMaker"}, +            {9903, nullptr, "SetOffscreenRecordingMarker"}          };          // clang-format on diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp index 1f2131ec8..cb35919e9 100644 --- a/src/core/hle/service/hid/controllers/debug_pad.cpp +++ b/src/core/hle/service/hid/controllers/debug_pad.cpp @@ -23,7 +23,7 @@ void Controller_DebugPad::OnRelease() {}  void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,                                     std::size_t size) { -    shared_memory.header.timestamp = core_timing.GetTicks(); +    shared_memory.header.timestamp = core_timing.GetCPUTicks();      shared_memory.header.total_entry_count = 17;      if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp index 6e990dd00..b7b7bfeae 100644 --- a/src/core/hle/service/hid/controllers/gesture.cpp +++ b/src/core/hle/service/hid/controllers/gesture.cpp @@ -19,7 +19,7 @@ void Controller_Gesture::OnRelease() {}  void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,                                    std::size_t size) { -    shared_memory.header.timestamp = core_timing.GetTicks(); +    shared_memory.header.timestamp = core_timing.GetCPUTicks();      shared_memory.header.total_entry_count = 17;      if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp index 9a8d354ba..feae89525 100644 --- a/src/core/hle/service/hid/controllers/keyboard.cpp +++ b/src/core/hle/service/hid/controllers/keyboard.cpp @@ -21,7 +21,7 @@ void Controller_Keyboard::OnRelease() {}  void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,                                     std::size_t size) { -    shared_memory.header.timestamp = core_timing.GetTicks(); +    shared_memory.header.timestamp = core_timing.GetCPUTicks();      shared_memory.header.total_entry_count = 17;      if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp index 93d88ea50..ac40989c5 100644 --- a/src/core/hle/service/hid/controllers/mouse.cpp +++ b/src/core/hle/service/hid/controllers/mouse.cpp @@ -19,7 +19,7 @@ void Controller_Mouse::OnRelease() {}  void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,                                  std::size_t size) { -    shared_memory.header.timestamp = core_timing.GetTicks(); +    shared_memory.header.timestamp = core_timing.GetCPUTicks();      shared_memory.header.total_entry_count = 17;      if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index c55d900e2..ef67ad690 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -328,7 +328,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*              const auto& last_entry =                  main_controller->npad[main_controller->common.last_entry_index]; -            main_controller->common.timestamp = core_timing.GetTicks(); +            main_controller->common.timestamp = core_timing.GetCPUTicks();              main_controller->common.last_entry_index =                  (main_controller->common.last_entry_index + 1) % 17; @@ -566,6 +566,14 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) {      connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;  } +void Controller_NPad::SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode) { +    gyroscope_zero_drift_mode = drift_mode; +} + +Controller_NPad::GyroscopeZeroDriftMode Controller_NPad::GetGyroscopeZeroDriftMode() const { +    return gyroscope_zero_drift_mode; +} +  void Controller_NPad::StartLRAssignmentMode() {      // Nothing internally is used for lr assignment mode. Since we have the ability to set the      // controller types from boot, it doesn't really matter about showing a selection screen diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 931f03430..5d4c58a43 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h @@ -58,6 +58,12 @@ public:      };      static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size"); +    enum class GyroscopeZeroDriftMode : u32 { +        Loose = 0, +        Standard = 1, +        Tight = 2, +    }; +      enum class NpadHoldType : u64 {          Vertical = 0,          Horizontal = 1, @@ -117,6 +123,8 @@ public:      void ConnectNPad(u32 npad_id);      void DisconnectNPad(u32 npad_id); +    void SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode); +    GyroscopeZeroDriftMode GetGyroscopeZeroDriftMode() const;      LedPattern GetLedPattern(u32 npad_id);      void SetVibrationEnabled(bool can_vibrate);      bool IsVibrationEnabled() const; @@ -324,8 +332,8 @@ private:      std::array<Kernel::EventPair, 10> styleset_changed_events;      Vibration last_processed_vibration{};      std::array<ControllerHolder, 10> connected_controllers{}; +    GyroscopeZeroDriftMode gyroscope_zero_drift_mode{GyroscopeZeroDriftMode::Standard};      bool can_controllers_vibrate{true}; -      std::array<ControllerPad, 10> npad_pad_states{};      bool is_in_lr_assignment_mode{false};      Core::System& system; diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp index 9e527d176..e7483bfa2 100644 --- a/src/core/hle/service/hid/controllers/stubbed.cpp +++ b/src/core/hle/service/hid/controllers/stubbed.cpp @@ -23,7 +23,7 @@ void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u      }      CommonHeader header{}; -    header.timestamp = core_timing.GetTicks(); +    header.timestamp = core_timing.GetCPUTicks();      header.total_entry_count = 17;      header.entry_count = 0;      header.last_entry_index = 0; diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp index 1c6e55566..e326f8f5c 100644 --- a/src/core/hle/service/hid/controllers/touchscreen.cpp +++ b/src/core/hle/service/hid/controllers/touchscreen.cpp @@ -22,7 +22,7 @@ void Controller_Touchscreen::OnRelease() {}  void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,                                        std::size_t size) { -    shared_memory.header.timestamp = core_timing.GetTicks(); +    shared_memory.header.timestamp = core_timing.GetCPUTicks();      shared_memory.header.total_entry_count = 17;      if (!IsControllerActivated()) { @@ -49,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timin          touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;          touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;          touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle; -        const u64 tick = core_timing.GetTicks(); +        const u64 tick = core_timing.GetCPUTicks();          touch_entry.delta_time = tick - last_touch;          last_touch = tick;          touch_entry.finger = Settings::values.touchscreen.finger; diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp index 27511b27b..2503ef241 100644 --- a/src/core/hle/service/hid/controllers/xpad.cpp +++ b/src/core/hle/service/hid/controllers/xpad.cpp @@ -20,7 +20,7 @@ void Controller_XPad::OnRelease() {}  void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,                                 std::size_t size) {      for (auto& xpad_entry : shared_memory.shared_memory_entries) { -        xpad_entry.header.timestamp = core_timing.GetTicks(); +        xpad_entry.header.timestamp = core_timing.GetCPUTicks();          xpad_entry.header.total_entry_count = 17;          if (!IsControllerActivated()) { diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 72a050de2..e9020e0dc 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -39,11 +39,9 @@ namespace Service::HID {  // Updating period for each HID device.  // TODO(ogniK): Find actual polling rate of hid -constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66); -[[maybe_unused]] constexpr s64 accelerometer_update_ticks = -    static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); -[[maybe_unused]] constexpr s64 gyroscope_update_ticks = -    static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100); +constexpr s64 pad_update_ticks = static_cast<s64>(1000000000 / 66); +[[maybe_unused]] constexpr s64 accelerometer_update_ticks = static_cast<s64>(1000000000 / 100); +[[maybe_unused]] constexpr s64 gyroscope_update_ticks = static_cast<s64>(1000000000 / 100);  constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;  IAppletResource::IAppletResource(Core::System& system) @@ -78,8 +76,8 @@ IAppletResource::IAppletResource(Core::System& system)      // Register update callbacks      pad_update_event = -        Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) { -            UpdateControllers(userdata, cycles_late); +        Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 ns_late) { +            UpdateControllers(userdata, ns_late);          });      // TODO(shinyquagsire23): Other update callbacks? (accel, gyro?) @@ -109,7 +107,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {      rb.PushCopyObjects(shared_mem);  } -void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) { +void IAppletResource::UpdateControllers(u64 userdata, s64 ns_late) {      auto& core_timing = system.CoreTiming();      const bool should_reload = Settings::values.is_device_reload_pending.exchange(false); @@ -120,7 +118,7 @@ void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {          controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);      } -    core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event); +    core_timing.ScheduleEvent(pad_update_ticks - ns_late, pad_update_event);  }  class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> { @@ -185,8 +183,8 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {          {77, nullptr, "GetAccelerometerPlayMode"},          {78, nullptr, "ResetAccelerometerPlayMode"},          {79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"}, -        {80, nullptr, "GetGyroscopeZeroDriftMode"}, -        {81, nullptr, "ResetGyroscopeZeroDriftMode"}, +        {80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"}, +        {81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"},          {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"},          {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"},          {91, &Hid::ActivateGesture, "ActivateGesture"}, @@ -230,15 +228,15 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {          {211, nullptr, "IsVibrationDeviceMounted"},          {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"},          {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"}, -        {302, nullptr, "StopConsoleSixAxisSensor"}, -        {303, nullptr, "ActivateSevenSixAxisSensor"}, -        {304, nullptr, "StartSevenSixAxisSensor"}, +        {302, &Hid::StopConsoleSixAxisSensor, "StopConsoleSixAxisSensor"}, +        {303, &Hid::ActivateSevenSixAxisSensor, "ActivateSevenSixAxisSensor"}, +        {304, &Hid::StartSevenSixAxisSensor, "StartSevenSixAxisSensor"},          {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"},          {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"}, -        {307, nullptr, "FinalizeSevenSixAxisSensor"}, +        {307, &Hid::FinalizeSevenSixAxisSensor, "FinalizeSevenSixAxisSensor"},          {308, nullptr, "SetSevenSixAxisSensorFusionStrength"},          {309, nullptr, "GetSevenSixAxisSensorFusionStrength"}, -        {310, nullptr, "ResetSevenSixAxisSensorTimestamp"}, +        {310, &Hid::ResetSevenSixAxisSensorTimestamp, "ResetSevenSixAxisSensorTimestamp"},          {400, nullptr, "IsUsbFullKeyControllerEnabled"},          {401, nullptr, "EnableUsbFullKeyController"},          {402, nullptr, "IsUsbFullKeyControllerConnected"}, @@ -374,6 +372,15 @@ void Hid::ActivateKeyboard(Kernel::HLERequestContext& ctx) {      rb.Push(RESULT_SUCCESS);  } +void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { +    IPC::RequestParser rp{ctx}; +    const auto flags{rp.Pop<u32>()}; +    LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); + +    IPC::ResponseBuilder rb{ctx, 2}; +    rb.Push(RESULT_SUCCESS); +} +  void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) {      IPC::RequestParser rp{ctx};      const auto unknown{rp.Pop<u32>()}; @@ -413,15 +420,59 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) {      rb.Push(RESULT_SUCCESS);  } +void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { +    IPC::RequestParser rp{ctx}; +    const auto handle{rp.Pop<u32>()}; +    const auto applet_resource_user_id{rp.Pop<u64>()}; + +    LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle, +                applet_resource_user_id); + +    IPC::ResponseBuilder rb{ctx, 2}; +    rb.Push(RESULT_SUCCESS); +} +  void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {      IPC::RequestParser rp{ctx};      const auto handle{rp.Pop<u32>()};      const auto drift_mode{rp.Pop<u32>()};      const auto applet_resource_user_id{rp.Pop<u64>()}; -    LOG_WARNING(Service_HID, -                "(STUBBED) called, handle={}, drift_mode={}, applet_resource_user_id={}", handle, -                drift_mode, applet_resource_user_id); +    applet_resource->GetController<Controller_NPad>(HidController::NPad) +        .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode{drift_mode}); + +    LOG_DEBUG(Service_HID, "called, handle={}, drift_mode={}, applet_resource_user_id={}", handle, +              drift_mode, applet_resource_user_id); + +    IPC::ResponseBuilder rb{ctx, 2}; +    rb.Push(RESULT_SUCCESS); +} + +void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { +    IPC::RequestParser rp{ctx}; +    const auto handle{rp.Pop<u32>()}; +    const auto applet_resource_user_id{rp.Pop<u64>()}; + +    LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle, +              applet_resource_user_id); + +    IPC::ResponseBuilder rb{ctx, 3}; +    rb.Push(RESULT_SUCCESS); +    rb.Push<u32>( +        static_cast<u32>(applet_resource->GetController<Controller_NPad>(HidController::NPad) +                             .GetGyroscopeZeroDriftMode())); +} + +void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { +    IPC::RequestParser rp{ctx}; +    const auto handle{rp.Pop<u32>()}; +    const auto applet_resource_user_id{rp.Pop<u64>()}; + +    applet_resource->GetController<Controller_NPad>(HidController::NPad) +        .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode::Standard); + +    LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle, +              applet_resource_user_id);      IPC::ResponseBuilder rb{ctx, 2};      rb.Push(RESULT_SUCCESS); @@ -832,33 +883,35 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {      rb.Push(RESULT_SUCCESS);  } -void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { +void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {      IPC::RequestParser rp{ctx};      const auto handle{rp.Pop<u32>()}; +    const auto applet_resource_user_id{rp.Pop<u64>()}; -    LOG_WARNING(Service_HID, "(STUBBED) called, handle={}", handle); +    LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle, +                applet_resource_user_id);      IPC::ResponseBuilder rb{ctx, 2};      rb.Push(RESULT_SUCCESS);  } -void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) { +void Hid::ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {      IPC::RequestParser rp{ctx};      const auto applet_resource_user_id{rp.Pop<u64>()}; -    const auto unknown{rp.Pop<u32>()}; -    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, unknown={}", -                applet_resource_user_id, unknown); +    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", +                applet_resource_user_id);      IPC::ResponseBuilder rb{ctx, 2};      rb.Push(RESULT_SUCCESS);  } -void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { +void Hid::StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {      IPC::RequestParser rp{ctx}; -    const auto unknown{rp.Pop<u32>()}; +    const auto applet_resource_user_id{rp.Pop<u64>()}; -    LOG_WARNING(Service_HID, "(STUBBED) called, unknown={}", unknown); +    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", +                applet_resource_user_id);      IPC::ResponseBuilder rb{ctx, 2};      rb.Push(RESULT_SUCCESS); @@ -882,10 +935,46 @@ void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {      rb.Push(RESULT_SUCCESS);  } -void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { +void Hid::FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {      IPC::RequestParser rp{ctx}; -    const auto flags{rp.Pop<u32>()}; -    LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); +    const auto applet_resource_user_id{rp.Pop<u64>()}; + +    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", +                applet_resource_user_id); + +    IPC::ResponseBuilder rb{ctx, 2}; +    rb.Push(RESULT_SUCCESS); +} + +void Hid::ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx) { +    IPC::RequestParser rp{ctx}; +    const auto applet_resource_user_id{rp.Pop<u64>()}; + +    LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}", +                applet_resource_user_id); + +    IPC::ResponseBuilder rb{ctx, 2}; +    rb.Push(RESULT_SUCCESS); +} + +void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) { +    IPC::RequestParser rp{ctx}; +    const auto applet_resource_user_id{rp.Pop<u64>()}; +    const auto is_palma_all_connectable{rp.Pop<bool>()}; + +    LOG_WARNING(Service_HID, +                "(STUBBED) called, applet_resource_user_id={}, is_palma_all_connectable={}", +                applet_resource_user_id, is_palma_all_connectable); + +    IPC::ResponseBuilder rb{ctx, 2}; +    rb.Push(RESULT_SUCCESS); +} + +void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { +    IPC::RequestParser rp{ctx}; +    const auto palma_boost_mode{rp.Pop<bool>()}; + +    LOG_WARNING(Service_HID, "(STUBBED) called, palma_boost_mode={}", palma_boost_mode);      IPC::ResponseBuilder rb{ctx, 2};      rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index d481a75f8..6fb048360 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -91,10 +91,14 @@ private:      void ActivateTouchScreen(Kernel::HLERequestContext& ctx);      void ActivateMouse(Kernel::HLERequestContext& ctx);      void ActivateKeyboard(Kernel::HLERequestContext& ctx); +    void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx);      void ActivateGesture(Kernel::HLERequestContext& ctx);      void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx);      void StartSixAxisSensor(Kernel::HLERequestContext& ctx); +    void StopSixAxisSensor(Kernel::HLERequestContext& ctx);      void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); +    void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); +    void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);      void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx);      void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);      void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); @@ -126,12 +130,15 @@ private:      void IsVibrationPermitted(Kernel::HLERequestContext& ctx);      void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);      void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); -    void StopSixAxisSensor(Kernel::HLERequestContext& ctx); -    void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); -    void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); +    void StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); +    void ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx); +    void StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx);      void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx);      void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); -    void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx); +    void FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); +    void ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx); +    void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); +    void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);      std::shared_ptr<IAppletResource> applet_resource;      Core::System& system; diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp index 36ed6f7da..e82fd031b 100644 --- a/src/core/hle/service/hid/irs.cpp +++ b/src/core/hle/service/hid/irs.cpp @@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {      IPC::ResponseBuilder rb{ctx, 5};      rb.Push(RESULT_SUCCESS); -    rb.PushRaw<u64>(system.CoreTiming().GetTicks()); +    rb.PushRaw<u64>(system.CoreTiming().GetCPUTicks());      rb.PushRaw<u32>(0);  } diff --git a/src/core/hle/service/lbl/lbl.cpp b/src/core/hle/service/lbl/lbl.cpp index e8f9f2d29..17350b403 100644 --- a/src/core/hle/service/lbl/lbl.cpp +++ b/src/core/hle/service/lbl/lbl.cpp @@ -47,6 +47,7 @@ public:              {26, &LBL::EnableVrMode, "EnableVrMode"},              {27, &LBL::DisableVrMode, "DisableVrMode"},              {28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"}, +            {29, nullptr, "IsAutoBrightnessControlSupported"},          };          // clang-format on diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index 92adde6d4..49972cd69 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -69,6 +69,7 @@ public:              {101, nullptr, "GetNetworkInfoLatestUpdate"},              {102, nullptr, "Scan"},              {103, nullptr, "ScanPrivate"}, +            {104, nullptr, "SetWirelessControllerRestriction"},              {200, nullptr, "OpenAccessPoint"},              {201, nullptr, "CloseAccessPoint"},              {202, nullptr, "CreateNetwork"}, diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 6ad3be1b3..64a526b9e 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -39,42 +39,61 @@ constexpr ResultCode ERROR_NOT_INITIALIZED{ErrorModule::Loader, 87};  constexpr std::size_t MAXIMUM_LOADED_RO{0x40};  constexpr std::size_t MAXIMUM_MAP_RETRIES{0x200}; +constexpr std::size_t TEXT_INDEX{0}; +constexpr std::size_t RO_INDEX{1}; +constexpr std::size_t DATA_INDEX{2}; + +struct NRRCertification { +    u64_le application_id_mask; +    u64_le application_id_pattern; +    INSERT_PADDING_BYTES(0x10); +    std::array<u8, 0x100> public_key; // Also known as modulus +    std::array<u8, 0x100> signature; +}; +static_assert(sizeof(NRRCertification) == 0x220, "NRRCertification has invalid size."); +  struct NRRHeader {      u32_le magic; -    INSERT_PADDING_BYTES(12); -    u64_le title_id_mask; -    u64_le title_id_pattern; -    INSERT_PADDING_BYTES(16); -    std::array<u8, 0x100> modulus; -    std::array<u8, 0x100> signature_1; -    std::array<u8, 0x100> signature_2; -    u64_le title_id; +    u32_le certification_signature_key_generation; // 9.0.0+ +    INSERT_PADDING_WORDS(2); +    NRRCertification certification; +    std::array<u8, 0x100> signature; +    u64_le application_id;      u32_le size; -    INSERT_PADDING_BYTES(4); +    u8 nrr_kind; // 7.0.0+ +    INSERT_PADDING_BYTES(3);      u32_le hash_offset;      u32_le hash_count; -    INSERT_PADDING_BYTES(8); +    INSERT_PADDING_WORDS(2); +}; +static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has invalid size."); + +struct SegmentHeader { +    u32_le memory_offset; +    u32_le memory_size;  }; -static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has incorrect size."); +static_assert(sizeof(SegmentHeader) == 0x8, "SegmentHeader has invalid size.");  struct NROHeader { +    // Switchbrew calls this "Start" (0x10)      INSERT_PADDING_WORDS(1);      u32_le mod_offset;      INSERT_PADDING_WORDS(2); + +    // Switchbrew calls this "Header" (0x70)      u32_le magic;      u32_le version;      u32_le nro_size;      u32_le flags; -    u32_le text_offset; -    u32_le text_size; -    u32_le ro_offset; -    u32_le ro_size; -    u32_le rw_offset; -    u32_le rw_size; +    // .text, .ro, .data +    std::array<SegmentHeader, 3> segment_headers;      u32_le bss_size;      INSERT_PADDING_WORDS(1);      std::array<u8, 0x20> build_id; -    INSERT_PADDING_BYTES(0x20); +    u32_le dso_handle_offset; +    INSERT_PADDING_WORDS(1); +    // .apiInfo, .dynstr, .dynsym +    std::array<SegmentHeader, 3> segment_headers_2;  };  static_assert(sizeof(NROHeader) == 0x80, "NROHeader has invalid size."); @@ -91,6 +110,7 @@ struct NROInfo {      std::size_t data_size{};      VAddr src_addr{};  }; +static_assert(sizeof(NROInfo) == 0x60, "NROInfo has invalid size.");  class DebugMonitor final : public ServiceFramework<DebugMonitor> {  public: @@ -226,11 +246,11 @@ public:              return;          } -        if (system.CurrentProcess()->GetTitleID() != header.title_id) { +        if (system.CurrentProcess()->GetTitleID() != header.application_id) {              LOG_ERROR(Service_LDR,                        "Attempting to load NRR with title ID other than current process. (actual "                        "{:016X})!", -                      header.title_id); +                      header.application_id);              IPC::ResponseBuilder rb{ctx, 2};              rb.Push(ERROR_INVALID_NRR);              return; @@ -348,10 +368,10 @@ public:      ResultCode LoadNro(Kernel::Process* process, const NROHeader& nro_header, VAddr nro_addr,                         VAddr start) const { -        const VAddr text_start{start + nro_header.text_offset}; -        const VAddr ro_start{start + nro_header.ro_offset}; -        const VAddr data_start{start + nro_header.rw_offset}; -        const VAddr bss_start{data_start + nro_header.rw_size}; +        const VAddr text_start{start + nro_header.segment_headers[TEXT_INDEX].memory_offset}; +        const VAddr ro_start{start + nro_header.segment_headers[RO_INDEX].memory_offset}; +        const VAddr data_start{start + nro_header.segment_headers[DATA_INDEX].memory_offset}; +        const VAddr bss_start{data_start + nro_header.segment_headers[DATA_INDEX].memory_size};          const VAddr bss_end_addr{              Common::AlignUp(bss_start + nro_header.bss_size, Kernel::Memory::PageSize)}; @@ -360,9 +380,12 @@ public:              system.Memory().ReadBlock(src_addr, source_data.data(), source_data.size());              system.Memory().WriteBlock(dst_addr, source_data.data(), source_data.size());          }}; -        CopyCode(nro_addr + nro_header.text_offset, text_start, nro_header.text_size); -        CopyCode(nro_addr + nro_header.ro_offset, ro_start, nro_header.ro_size); -        CopyCode(nro_addr + nro_header.rw_offset, data_start, nro_header.rw_size); +        CopyCode(nro_addr + nro_header.segment_headers[TEXT_INDEX].memory_offset, text_start, +                 nro_header.segment_headers[TEXT_INDEX].memory_size); +        CopyCode(nro_addr + nro_header.segment_headers[RO_INDEX].memory_offset, ro_start, +                 nro_header.segment_headers[RO_INDEX].memory_size); +        CopyCode(nro_addr + nro_header.segment_headers[DATA_INDEX].memory_offset, data_start, +                 nro_header.segment_headers[DATA_INDEX].memory_size);          CASCADE_CODE(process->PageTable().SetCodeMemoryPermission(              text_start, ro_start - text_start, Kernel::Memory::MemoryPermission::ReadAndExecute)); @@ -484,9 +507,11 @@ public:          }          // Track the loaded NRO -        nro.insert_or_assign(*map_result, NROInfo{hash, *map_result, nro_size, bss_address, -                                                  bss_size, header.text_size, header.ro_size, -                                                  header.rw_size, nro_address}); +        nro.insert_or_assign(*map_result, +                             NROInfo{hash, *map_result, nro_size, bss_address, bss_size, +                                     header.segment_headers[TEXT_INDEX].memory_size, +                                     header.segment_headers[RO_INDEX].memory_size, +                                     header.segment_headers[DATA_INDEX].memory_size, nro_address});          // Invalidate JIT caches for the newly mapped process code          system.InvalidateCpuInstructionCaches(); @@ -584,11 +609,21 @@ private:      static bool IsValidNRO(const NROHeader& header, u64 nro_size, u64 bss_size) {          return header.magic == Common::MakeMagic('N', 'R', 'O', '0') &&                 header.nro_size == nro_size && header.bss_size == bss_size && -               header.ro_offset == header.text_offset + header.text_size && -               header.rw_offset == header.ro_offset + header.ro_size && -               nro_size == header.rw_offset + header.rw_size && -               Common::Is4KBAligned(header.text_size) && Common::Is4KBAligned(header.ro_size) && -               Common::Is4KBAligned(header.rw_size); + +               header.segment_headers[RO_INDEX].memory_offset == +                   header.segment_headers[TEXT_INDEX].memory_offset + +                       header.segment_headers[TEXT_INDEX].memory_size && + +               header.segment_headers[DATA_INDEX].memory_offset == +                   header.segment_headers[RO_INDEX].memory_offset + +                       header.segment_headers[RO_INDEX].memory_size && + +               nro_size == header.segment_headers[DATA_INDEX].memory_offset + +                               header.segment_headers[DATA_INDEX].memory_size && + +               Common::Is4KBAligned(header.segment_headers[TEXT_INDEX].memory_size) && +               Common::Is4KBAligned(header.segment_headers[RO_INDEX].memory_size) && +               Common::Is4KBAligned(header.segment_headers[DATA_INDEX].memory_size);      }      Core::System& system;  }; diff --git a/src/core/hle/service/lm/manager.cpp b/src/core/hle/service/lm/manager.cpp index b67081b86..3ee2374e7 100644 --- a/src/core/hle/service/lm/manager.cpp +++ b/src/core/hle/service/lm/manager.cpp @@ -86,7 +86,8 @@ std::string FormatField(Field type, const std::vector<u8>& data) {          return Common::StringFromFixedZeroTerminatedBuffer(              reinterpret_cast<const char*>(data.data()), data.size());      default: -        UNIMPLEMENTED(); +        UNIMPLEMENTED_MSG("Unimplemented field type={}", type); +        return "";      }  } diff --git a/src/core/hle/service/mig/mig.cpp b/src/core/hle/service/mig/mig.cpp index d16367f2c..113a4665c 100644 --- a/src/core/hle/service/mig/mig.cpp +++ b/src/core/hle/service/mig/mig.cpp @@ -20,6 +20,12 @@ public:              {101, nullptr, "ResumeServer"},              {200, nullptr, "CreateClient"},              {201, nullptr, "ResumeClient"}, +            {1001, nullptr, "Unknown1001"}, +            {1010, nullptr, "Unknown1010"}, +            {1100, nullptr, "Unknown1100"}, +            {1101, nullptr, "Unknown1101"}, +            {1200, nullptr, "Unknown1200"}, +            {1201, nullptr, "Unknown1201"}          };          // clang-format on diff --git a/src/core/hle/service/mm/mm_u.cpp b/src/core/hle/service/mm/mm_u.cpp index def63dc8a..25c24e537 100644 --- a/src/core/hle/service/mm/mm_u.cpp +++ b/src/core/hle/service/mm/mm_u.cpp @@ -14,14 +14,14 @@ public:      explicit MM_U() : ServiceFramework{"mm:u"} {          // clang-format off          static const FunctionInfo functions[] = { -            {0, &MM_U::Initialize, "Initialize"}, -            {1, &MM_U::Finalize, "Finalize"}, -            {2, &MM_U::SetAndWait, "SetAndWait"}, -            {3, &MM_U::Get, "Get"}, -            {4, &MM_U::InitializeWithId, "InitializeWithId"}, -            {5, &MM_U::FinalizeWithId, "FinalizeWithId"}, -            {6, &MM_U::SetAndWaitWithId, "SetAndWaitWithId"}, -            {7, &MM_U::GetWithId, "GetWithId"}, +            {0, &MM_U::InitializeOld, "InitializeOld"}, +            {1, &MM_U::FinalizeOld, "FinalizeOld"}, +            {2, &MM_U::SetAndWaitOld, "SetAndWaitOld"}, +            {3, &MM_U::GetOld, "GetOld"}, +            {4, &MM_U::Initialize, "Initialize"}, +            {5, &MM_U::Finalize, "Finalize"}, +            {6, &MM_U::SetAndWait, "SetAndWait"}, +            {7, &MM_U::Get, "Get"},          };          // clang-format on @@ -29,21 +29,21 @@ public:      }  private: -    void Initialize(Kernel::HLERequestContext& ctx) { +    void InitializeOld(Kernel::HLERequestContext& ctx) {          LOG_WARNING(Service_MM, "(STUBBED) called");          IPC::ResponseBuilder rb{ctx, 2};          rb.Push(RESULT_SUCCESS);      } -    void Finalize(Kernel::HLERequestContext& ctx) { +    void FinalizeOld(Kernel::HLERequestContext& ctx) {          LOG_WARNING(Service_MM, "(STUBBED) called");          IPC::ResponseBuilder rb{ctx, 2};          rb.Push(RESULT_SUCCESS);      } -    void SetAndWait(Kernel::HLERequestContext& ctx) { +    void SetAndWaitOld(Kernel::HLERequestContext& ctx) {          IPC::RequestParser rp{ctx};          min = rp.Pop<u32>();          max = rp.Pop<u32>(); @@ -54,7 +54,7 @@ private:          rb.Push(RESULT_SUCCESS);      } -    void Get(Kernel::HLERequestContext& ctx) { +    void GetOld(Kernel::HLERequestContext& ctx) {          LOG_WARNING(Service_MM, "(STUBBED) called");          IPC::ResponseBuilder rb{ctx, 3}; @@ -62,7 +62,7 @@ private:          rb.Push(current);      } -    void InitializeWithId(Kernel::HLERequestContext& ctx) { +    void Initialize(Kernel::HLERequestContext& ctx) {          LOG_WARNING(Service_MM, "(STUBBED) called");          IPC::ResponseBuilder rb{ctx, 3}; @@ -70,14 +70,14 @@ private:          rb.Push<u32>(id); // Any non zero value      } -    void FinalizeWithId(Kernel::HLERequestContext& ctx) { +    void Finalize(Kernel::HLERequestContext& ctx) {          LOG_WARNING(Service_MM, "(STUBBED) called");          IPC::ResponseBuilder rb{ctx, 2};          rb.Push(RESULT_SUCCESS);      } -    void SetAndWaitWithId(Kernel::HLERequestContext& ctx) { +    void SetAndWait(Kernel::HLERequestContext& ctx) {          IPC::RequestParser rp{ctx};          u32 input_id = rp.Pop<u32>();          min = rp.Pop<u32>(); @@ -90,7 +90,7 @@ private:          rb.Push(RESULT_SUCCESS);      } -    void GetWithId(Kernel::HLERequestContext& ctx) { +    void Get(Kernel::HLERequestContext& ctx) {          LOG_WARNING(Service_MM, "(STUBBED) called");          IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp index ec9aae04a..e38dea1f4 100644 --- a/src/core/hle/service/ncm/ncm.cpp +++ b/src/core/hle/service/ncm/ncm.cpp @@ -28,16 +28,16 @@ public:              {7, nullptr, "ResolveApplicationLegalInformationPath"},              {8, nullptr, "RedirectApplicationLegalInformationPath"},              {9, nullptr, "Refresh"}, -            {10, nullptr, "RedirectProgramPath2"}, -            {11, nullptr, "Refresh2"}, -            {12, nullptr, "DeleteProgramPath"}, -            {13, nullptr, "DeleteApplicationControlPath"}, -            {14, nullptr, "DeleteApplicationHtmlDocumentPath"}, -            {15, nullptr, "DeleteApplicationLegalInformationPath"}, -            {16, nullptr, ""}, -            {17, nullptr, ""}, -            {18, nullptr, ""}, -            {19, nullptr, ""}, +            {10, nullptr, "RedirectApplicationProgramPath"}, +            {11, nullptr, "ClearApplicationRedirection"}, +            {12, nullptr, "EraseProgramRedirection"}, +            {13, nullptr, "EraseApplicationControlRedirection"}, +            {14, nullptr, "EraseApplicationHtmlDocumentRedirection"}, +            {15, nullptr, "EraseApplicationLegalInformationRedirection"}, +            {16, nullptr, "ResolveProgramPathForDebug"}, +            {17, nullptr, "RedirectProgramPathForDebug"}, +            {18, nullptr, "RedirectApplicationProgramPathForDebug"}, +            {19, nullptr, "EraseProgramRedirectionForDebug"},          };          // clang-format on diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp index b7b34ce7e..780ea30fe 100644 --- a/src/core/hle/service/nfc/nfc.cpp +++ b/src/core/hle/service/nfc/nfc.cpp @@ -198,9 +198,9 @@ public:          static const FunctionInfo functions[] = {              {0, nullptr, "Initialize"},              {1, nullptr, "Finalize"}, -            {2, nullptr, "GetState"}, -            {3, nullptr, "IsNfcEnabled"}, -            {100, nullptr, "SetNfcEnabled"}, +            {2, nullptr, "GetStateOld"}, +            {3, nullptr, "IsNfcEnabledOld"}, +            {100, nullptr, "SetNfcEnabledOld"},              {400, nullptr, "InitializeSystem"},              {401, nullptr, "FinalizeSystem"},              {402, nullptr, "GetState"}, diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 0d913334e..fba89e7a6 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -200,8 +200,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o      IoctlGetGpuTime params{};      std::memcpy(¶ms, input.data(), input.size()); -    const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks()); -    params.gpu_time = static_cast<u64_le>(ns.count()); +    params.gpu_time = static_cast<u64_le>(system.CoreTiming().GetGlobalTimeNs().count());      std::memcpy(output.data(), ¶ms, output.size());      return 0;  } diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 437bc5dee..2f44d3779 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -9,6 +9,7 @@  #include "common/logging/log.h"  #include "common/microprofile.h"  #include "common/scope_exit.h" +#include "common/thread.h"  #include "core/core.h"  #include "core/core_timing.h"  #include "core/core_timing_util.h" @@ -27,8 +28,35 @@  namespace Service::NVFlinger { -constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); -constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30); +constexpr s64 frame_ticks = static_cast<s64>(1000000000 / 60); +constexpr s64 frame_ticks_30fps = static_cast<s64>(1000000000 / 30); + +void NVFlinger::VSyncThread(NVFlinger& nv_flinger) { +    nv_flinger.SplitVSync(); +} + +void NVFlinger::SplitVSync() { +    system.RegisterHostThread(); +    std::string name = "yuzu:VSyncThread"; +    MicroProfileOnThreadCreate(name.c_str()); +    Common::SetCurrentThreadName(name.c_str()); +    Common::SetCurrentThreadPriority(Common::ThreadPriority::High); +    s64 delay = 0; +    while (is_running) { +        guard->lock(); +        const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count(); +        Compose(); +        const auto ticks = GetNextTicks(); +        const s64 time_end = system.CoreTiming().GetGlobalTimeNs().count(); +        const s64 time_passed = time_end - time_start; +        const s64 next_time = std::max<s64>(0, ticks - time_passed - delay); +        guard->unlock(); +        if (next_time > 0) { +            wait_event->WaitFor(std::chrono::nanoseconds{next_time}); +        } +        delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time; +    } +}  NVFlinger::NVFlinger(Core::System& system) : system(system) {      displays.emplace_back(0, "Default", system); @@ -36,22 +64,36 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {      displays.emplace_back(2, "Edid", system);      displays.emplace_back(3, "Internal", system);      displays.emplace_back(4, "Null", system); +    guard = std::make_shared<std::mutex>();      // Schedule the screen composition events      composition_event = -        Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) { +        Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 ns_late) { +            Lock();              Compose(); -            const auto ticks = -                Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks(); -            this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late), +            const auto ticks = GetNextTicks(); +            this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - ns_late),                                                      composition_event);          }); - -    system.CoreTiming().ScheduleEvent(frame_ticks, composition_event); +    if (system.IsMulticore()) { +        is_running = true; +        wait_event = std::make_unique<Common::Event>(); +        vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this)); +    } else { +        system.CoreTiming().ScheduleEvent(frame_ticks, composition_event); +    }  }  NVFlinger::~NVFlinger() { -    system.CoreTiming().UnscheduleEvent(composition_event, 0); +    if (system.IsMulticore()) { +        is_running = false; +        wait_event->Set(); +        vsync_thread->join(); +        vsync_thread.reset(); +        wait_event.reset(); +    } else { +        system.CoreTiming().UnscheduleEvent(composition_event, 0); +    }  }  void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { @@ -199,10 +241,12 @@ void NVFlinger::Compose() {          auto& gpu = system.GPU();          const auto& multi_fence = buffer->get().multi_fence; +        guard->unlock();          for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {              const auto& fence = multi_fence.fences[fence_id];              gpu.WaitFence(fence.id, fence.value);          } +        guard->lock();          MicroProfileFlip(); @@ -223,7 +267,7 @@ void NVFlinger::Compose() {  s64 NVFlinger::GetNextTicks() const {      constexpr s64 max_hertz = 120LL; -    return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz; +    return (1000000000 * (1LL << swap_interval)) / max_hertz;  }  } // namespace Service::NVFlinger diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 57a21f33b..e4959a9af 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -4,15 +4,22 @@  #pragma once +#include <atomic>  #include <memory> +#include <mutex>  #include <optional>  #include <string>  #include <string_view> +#include <thread>  #include <vector>  #include "common/common_types.h"  #include "core/hle/kernel/object.h" +namespace Common { +class Event; +} // namespace Common +  namespace Core::Timing {  class CoreTiming;  struct EventType; @@ -79,6 +86,10 @@ public:      s64 GetNextTicks() const; +    std::unique_lock<std::mutex> Lock() { +        return std::unique_lock{*guard}; +    } +  private:      /// Finds the display identified by the specified ID.      VI::Display* FindDisplay(u64 display_id); @@ -92,6 +103,10 @@ private:      /// Finds the layer identified by the specified ID in the desired display.      const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const; +    static void VSyncThread(NVFlinger& nv_flinger); + +    void SplitVSync(); +      std::shared_ptr<Nvidia::Module> nvdrv;      std::vector<VI::Display> displays; @@ -108,7 +123,13 @@ private:      /// Event that handles screen composition.      std::shared_ptr<Core::Timing::EventType> composition_event; +    std::shared_ptr<std::mutex> guard; +      Core::System& system; + +    std::unique_ptr<std::thread> vsync_thread; +    std::unique_ptr<Common::Event> wait_event; +    std::atomic<bool> is_running{};  };  } // namespace Service::NVFlinger diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp index 14309c679..67833d9af 100644 --- a/src/core/hle/service/prepo/prepo.cpp +++ b/src/core/hle/service/prepo/prepo.cpp @@ -75,8 +75,13 @@ private:          const auto user_id = rp.PopRaw<u128>();          const auto process_id = rp.PopRaw<u64>();          std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)}; +          if constexpr (Type == Core::Reporter::PlayReportType::Old2) { -            data.emplace_back(ctx.ReadBuffer(1)); +            const auto read_buffer_count = +                ctx.BufferDescriptorX().size() + ctx.BufferDescriptorA().size(); +            if (read_buffer_count > 1) { +                data.emplace_back(ctx.ReadBuffer(1)); +            }          }          LOG_DEBUG( diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index f3b4b286c..e5cfd2101 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp @@ -3,6 +3,7 @@  // Refer to the license.txt file included.  #include <algorithm> +#include <array>  #include <chrono>  #include "common/logging/log.h"  #include "core/hle/ipc_helpers.h" @@ -31,6 +32,44 @@ constexpr std::array<LanguageCode, 17> available_language_codes = {{      LanguageCode::ZH_HANT,  }}; +enum class KeyboardLayout : u64 { +    Japanese = 0, +    EnglishUs = 1, +    EnglishUsInternational = 2, +    EnglishUk = 3, +    French = 4, +    FrenchCa = 5, +    Spanish = 6, +    SpanishLatin = 7, +    German = 8, +    Italian = 9, +    Portuguese = 10, +    Russian = 11, +    Korean = 12, +    ChineseSimplified = 13, +    ChineseTraditional = 14, +}; + +constexpr std::array<std::pair<LanguageCode, KeyboardLayout>, 17> language_to_layout{{ +    {LanguageCode::JA, KeyboardLayout::Japanese}, +    {LanguageCode::EN_US, KeyboardLayout::EnglishUs}, +    {LanguageCode::FR, KeyboardLayout::French}, +    {LanguageCode::DE, KeyboardLayout::German}, +    {LanguageCode::IT, KeyboardLayout::Italian}, +    {LanguageCode::ES, KeyboardLayout::Spanish}, +    {LanguageCode::ZH_CN, KeyboardLayout::ChineseSimplified}, +    {LanguageCode::KO, KeyboardLayout::Korean}, +    {LanguageCode::NL, KeyboardLayout::EnglishUsInternational}, +    {LanguageCode::PT, KeyboardLayout::Portuguese}, +    {LanguageCode::RU, KeyboardLayout::Russian}, +    {LanguageCode::ZH_TW, KeyboardLayout::ChineseTraditional}, +    {LanguageCode::EN_GB, KeyboardLayout::EnglishUk}, +    {LanguageCode::FR_CA, KeyboardLayout::FrenchCa}, +    {LanguageCode::ES_419, KeyboardLayout::SpanishLatin}, +    {LanguageCode::ZH_HANS, KeyboardLayout::ChineseSimplified}, +    {LanguageCode::ZH_HANT, KeyboardLayout::ChineseTraditional}, +}}; +  constexpr std::size_t pre4_0_0_max_entries = 15;  constexpr std::size_t post4_0_0_max_entries = 17; @@ -50,6 +89,25 @@ void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t m      ctx.WriteBuffer(available_language_codes.data(), copy_size);      PushResponseLanguageCode(ctx, copy_amount);  } + +void GetKeyCodeMapImpl(Kernel::HLERequestContext& ctx) { +    const auto language_code = available_language_codes[Settings::values.language_index]; +    const auto key_code = +        std::find_if(language_to_layout.cbegin(), language_to_layout.cend(), +                     [=](const auto& element) { return element.first == language_code; }); +    KeyboardLayout layout = KeyboardLayout::EnglishUs; +    if (key_code == language_to_layout.cend()) { +        LOG_ERROR(Service_SET, +                  "Could not find keyboard layout for language index {}, defaulting to English us", +                  Settings::values.language_index); +    } else { +        layout = key_code->second; +    } + +    IPC::ResponseBuilder rb{ctx, 2}; +    rb.Push(RESULT_SUCCESS); +    ctx.WriteBuffer(&layout, sizeof(KeyboardLayout)); +}  } // Anonymous namespace  LanguageCode GetLanguageCodeFromIndex(std::size_t index) { @@ -120,6 +178,16 @@ void SET::GetRegionCode(Kernel::HLERequestContext& ctx) {      rb.Push(Settings::values.region_index);  } +void SET::GetKeyCodeMap(Kernel::HLERequestContext& ctx) { +    LOG_DEBUG(Service_SET, "Called {}", ctx.Description()); +    GetKeyCodeMapImpl(ctx); +} + +void SET::GetKeyCodeMap2(Kernel::HLERequestContext& ctx) { +    LOG_DEBUG(Service_SET, "Called {}", ctx.Description()); +    GetKeyCodeMapImpl(ctx); +} +  SET::SET() : ServiceFramework("set") {      // clang-format off      static const FunctionInfo functions[] = { @@ -130,9 +198,9 @@ SET::SET() : ServiceFramework("set") {          {4, &SET::GetRegionCode, "GetRegionCode"},          {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"},          {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, -        {7, nullptr, "GetKeyCodeMap"}, +        {7, &SET::GetKeyCodeMap, "GetKeyCodeMap"},          {8, &SET::GetQuestFlag, "GetQuestFlag"}, -        {9, nullptr, "GetKeyCodeMap2"}, +        {9, &SET::GetKeyCodeMap2, "GetKeyCodeMap2"},          {10, nullptr, "GetFirmwareVersionForDebug"},      };      // clang-format on diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h index 6084b345d..8ac9c169d 100644 --- a/src/core/hle/service/set/set.h +++ b/src/core/hle/service/set/set.h @@ -44,6 +44,8 @@ private:      void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx);      void GetQuestFlag(Kernel::HLERequestContext& ctx);      void GetRegionCode(Kernel::HLERequestContext& ctx); +    void GetKeyCodeMap(Kernel::HLERequestContext& ctx); +    void GetKeyCodeMap2(Kernel::HLERequestContext& ctx);  };  } // namespace Service::Set diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp index 6ada13be4..d872de16c 100644 --- a/src/core/hle/service/sm/sm.cpp +++ b/src/core/hle/service/sm/sm.cpp @@ -142,7 +142,7 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {      }      // Wake the threads waiting on the ServerPort -    server_port->WakeupAllWaitingThreads(); +    server_port->Signal();      LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId());      IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index 1575f0b49..59a272f4a 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp @@ -11,9 +11,8 @@  namespace Service::Time::Clock {  TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { -    const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( -        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), -        Core::Hardware::CNTFREQ)}; +    const TimeSpanType ticks_time_span{ +        TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};      TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};      if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index 44d5bc651..8baaa2a6a 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp @@ -11,9 +11,8 @@  namespace Service::Time::Clock {  SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { -    const TimeSpanType ticks_time_span{TimeSpanType::FromTicks( -        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), -        Core::Hardware::CNTFREQ)}; +    const TimeSpanType ticks_time_span{ +        TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};      return {ticks_time_span.ToSeconds(), GetClockSourceId()};  } diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 67f1bbcf3..4cf58a61a 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -234,9 +234,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe      const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)};      if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { -        const auto ticks{Clock::TimeSpanType::FromTicks( -            Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), -            Core::Hardware::CNTFREQ)}; +        const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), +                                                        Core::Hardware::CNTFREQ)};          const s64 base_time_point{context.offset + current_time_point.time_point -                                    ticks.ToSeconds()};          IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index 999ec1e51..e0ae9f874 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -30,8 +30,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system,                                              const Common::UUID& clock_source_id,                                              Clock::TimeSpanType current_time_point) {      const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( -        Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()), -        Core::Hardware::CNTFREQ)}; +        system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};      const Clock::SteadyClockContext context{          static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),          clock_source_id}; diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 46e14c2a3..157092074 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -511,6 +511,7 @@ private:          LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,                    static_cast<u32>(transaction), flags); +        nv_flinger->Lock();          auto& buffer_queue = nv_flinger->FindBufferQueue(id);          switch (transaction) { @@ -550,6 +551,7 @@ private:                      [=](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,                          Kernel::ThreadWakeupReason reason) {                          // Repeat TransactParcel DequeueBuffer when a buffer is available +                        nv_flinger->Lock();                          auto& buffer_queue = nv_flinger->FindBufferQueue(id);                          auto result = buffer_queue.DequeueBuffer(width, height);                          ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer."); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9d87045a0..7def00768 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -8,6 +8,7 @@  #include <utility>  #include "common/assert.h" +#include "common/atomic_ops.h"  #include "common/common_types.h"  #include "common/logging/log.h"  #include "common/page_table.h" @@ -29,15 +30,12 @@ namespace Core::Memory {  struct Memory::Impl {      explicit Impl(Core::System& system_) : system{system_} {} -    void SetCurrentPageTable(Kernel::Process& process) { +    void SetCurrentPageTable(Kernel::Process& process, u32 core_id) {          current_page_table = &process.PageTable().PageTableImpl();          const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); -        system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width); -        system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width); -        system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width); -        system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); +        system.ArmInterface(core_id).PageTableChanged(*current_page_table, address_space_width);      }      void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { @@ -179,6 +177,22 @@ struct Memory::Impl {          }      } +    bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) { +        return WriteExclusive<u8>(addr, data, expected); +    } + +    bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) { +        return WriteExclusive<u16_le>(addr, data, expected); +    } + +    bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) { +        return WriteExclusive<u32_le>(addr, data, expected); +    } + +    bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) { +        return WriteExclusive<u64_le>(addr, data, expected); +    } +      std::string ReadCString(VAddr vaddr, std::size_t max_length) {          std::string string;          string.reserve(max_length); @@ -682,6 +696,67 @@ struct Memory::Impl {          }      } +    template <typename T> +    bool WriteExclusive(const VAddr vaddr, const T data, const T expected) { +        u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; +        if (page_pointer != nullptr) { +            // NOTE: Avoid adding any extra logic to this fast-path block +            T volatile* pointer = reinterpret_cast<T volatile*>(&page_pointer[vaddr]); +            return Common::AtomicCompareAndSwap(pointer, data, expected); +        } + +        const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; +        switch (type) { +        case Common::PageType::Unmapped: +            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, +                      static_cast<u32>(data), vaddr); +            return true; +        case Common::PageType::Memory: +            ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); +            break; +        case Common::PageType::RasterizerCachedMemory: { +            u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; +            system.GPU().InvalidateRegion(vaddr, sizeof(T)); +            T volatile* pointer = reinterpret_cast<T volatile*>(&host_ptr); +            return Common::AtomicCompareAndSwap(pointer, data, expected); +            break; +        } +        default: +            UNREACHABLE(); +        } +        return true; +    } + +    bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) { +        u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; +        if (page_pointer != nullptr) { +            // NOTE: Avoid adding any extra logic to this fast-path block +            u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&page_pointer[vaddr]); +            return Common::AtomicCompareAndSwap(pointer, data, expected); +        } + +        const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; +        switch (type) { +        case Common::PageType::Unmapped: +            LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8, +                      static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr); +            return true; +        case Common::PageType::Memory: +            ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); +            break; +        case Common::PageType::RasterizerCachedMemory: { +            u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)}; +            system.GPU().InvalidateRegion(vaddr, sizeof(u128)); +            u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&host_ptr); +            return Common::AtomicCompareAndSwap(pointer, data, expected); +            break; +        } +        default: +            UNREACHABLE(); +        } +        return true; +    } +      Common::PageTable* current_page_table = nullptr;      Core::System& system;  }; @@ -689,8 +764,8 @@ struct Memory::Impl {  Memory::Memory(Core::System& system) : impl{std::make_unique<Impl>(system)} {}  Memory::~Memory() = default; -void Memory::SetCurrentPageTable(Kernel::Process& process) { -    impl->SetCurrentPageTable(process); +void Memory::SetCurrentPageTable(Kernel::Process& process, u32 core_id) { +    impl->SetCurrentPageTable(process, core_id);  }  void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { @@ -764,6 +839,26 @@ void Memory::Write64(VAddr addr, u64 data) {      impl->Write64(addr, data);  } +bool Memory::WriteExclusive8(VAddr addr, u8 data, u8 expected) { +    return impl->WriteExclusive8(addr, data, expected); +} + +bool Memory::WriteExclusive16(VAddr addr, u16 data, u16 expected) { +    return impl->WriteExclusive16(addr, data, expected); +} + +bool Memory::WriteExclusive32(VAddr addr, u32 data, u32 expected) { +    return impl->WriteExclusive32(addr, data, expected); +} + +bool Memory::WriteExclusive64(VAddr addr, u64 data, u64 expected) { +    return impl->WriteExclusive64(addr, data, expected); +} + +bool Memory::WriteExclusive128(VAddr addr, u128 data, u128 expected) { +    return impl->WriteExclusive128(addr, data, expected); +} +  std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) {      return impl->ReadCString(vaddr, max_length);  } diff --git a/src/core/memory.h b/src/core/memory.h index 9292f3b0a..4a1cc63f4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -64,7 +64,7 @@ public:       *       * @param process The process to use the page table of.       */ -    void SetCurrentPageTable(Kernel::Process& process); +    void SetCurrentPageTable(Kernel::Process& process, u32 core_id);      /**       * Maps an allocated buffer onto a region of the emulated process address space. @@ -245,6 +245,71 @@ public:      void Write64(VAddr addr, u64 data);      /** +     * Writes a 8-bit unsigned integer to the given virtual address in +     * the current process' address space if and only if the address contains +     * the expected value. This operation is atomic. +     * +     * @param addr The virtual address to write the 8-bit unsigned integer to. +     * @param data The 8-bit unsigned integer to write to the given virtual address. +     * @param expected The 8-bit unsigned integer to check against the given virtual address. +     * +     * @post The memory range [addr, sizeof(data)) contains the given data value. +     */ +    bool WriteExclusive8(VAddr addr, u8 data, u8 expected); + +    /** +     * Writes a 16-bit unsigned integer to the given virtual address in +     * the current process' address space if and only if the address contains +     * the expected value. This operation is atomic. +     * +     * @param addr The virtual address to write the 16-bit unsigned integer to. +     * @param data The 16-bit unsigned integer to write to the given virtual address. +     * @param expected The 16-bit unsigned integer to check against the given virtual address. +     * +     * @post The memory range [addr, sizeof(data)) contains the given data value. +     */ +    bool WriteExclusive16(VAddr addr, u16 data, u16 expected); + +    /** +     * Writes a 32-bit unsigned integer to the given virtual address in +     * the current process' address space if and only if the address contains +     * the expected value. This operation is atomic. +     * +     * @param addr The virtual address to write the 32-bit unsigned integer to. +     * @param data The 32-bit unsigned integer to write to the given virtual address. +     * @param expected The 32-bit unsigned integer to check against the given virtual address. +     * +     * @post The memory range [addr, sizeof(data)) contains the given data value. +     */ +    bool WriteExclusive32(VAddr addr, u32 data, u32 expected); + +    /** +     * Writes a 64-bit unsigned integer to the given virtual address in +     * the current process' address space if and only if the address contains +     * the expected value. This operation is atomic. +     * +     * @param addr The virtual address to write the 64-bit unsigned integer to. +     * @param data The 64-bit unsigned integer to write to the given virtual address. +     * @param expected The 64-bit unsigned integer to check against the given virtual address. +     * +     * @post The memory range [addr, sizeof(data)) contains the given data value. +     */ +    bool WriteExclusive64(VAddr addr, u64 data, u64 expected); + +    /** +     * Writes a 128-bit unsigned integer to the given virtual address in +     * the current process' address space if and only if the address contains +     * the expected value. This operation is atomic. +     * +     * @param addr The virtual address to write the 128-bit unsigned integer to. +     * @param data The 128-bit unsigned integer to write to the given virtual address. +     * @param expected The 128-bit unsigned integer to check against the given virtual address. +     * +     * @post The memory range [addr, sizeof(data)) contains the given data value. +     */ +    bool WriteExclusive128(VAddr addr, u128 data, u128 expected); + +    /**       * Reads a null-terminated string from the given virtual address.       * This function will continually read characters until either:       * diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index b139e8465..53d27859b 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp @@ -20,7 +20,7 @@  namespace Core::Memory { -constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12); +constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(1000000000 / 12);  constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;  StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata) @@ -190,7 +190,7 @@ CheatEngine::~CheatEngine() {  void CheatEngine::Initialize() {      event = Core::Timing::CreateEvent(          "CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id), -        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); }); +        [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); });      core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);      metadata.process_id = system.CurrentProcess()->GetProcessID(); @@ -217,7 +217,7 @@ void CheatEngine::Reload(std::vector<CheatEntry> cheats) {  MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70)); -void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) { +void CheatEngine::FrameCallback(u64 userdata, s64 ns_late) {      if (is_pending_reload.exchange(false)) {          vm.LoadProgram(cheats);      } @@ -230,7 +230,7 @@ void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {      vm.Execute(metadata); -    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event); +    core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - ns_late, event);  }  } // namespace Core::Memory diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp index f1ae9d4df..9f3a6b811 100644 --- a/src/core/perf_stats.cpp +++ b/src/core/perf_stats.cpp @@ -119,7 +119,7 @@ double PerfStats::GetLastFrameTimeScale() {  }  void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) { -    if (!Settings::values.use_frame_limit) { +    if (!Settings::values.use_frame_limit || Settings::values.use_multi_core) {          return;      } diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 4edff9cd8..56df5e925 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -127,6 +127,13 @@ void LogSettings() {      LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);  } +float Volume() { +    if (values.audio_muted) { +        return 0.0f; +    } +    return values.volume; +} +  bool IsGPULevelExtreme() {      return values.gpu_accuracy == GPUAccuracy::Extreme;  } diff --git a/src/core/settings.h b/src/core/settings.h index 33e1e06cd..a598ccbc1 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -459,6 +459,7 @@ struct Values {      bool use_dev_keys;      // Audio +    bool audio_muted;      std::string sink_id;      bool enable_audio_stretching;      std::string audio_device_id; @@ -490,6 +491,8 @@ struct Values {      std::map<u64, std::vector<std::string>> disabled_addons;  } extern values; +float Volume(); +  bool IsGPULevelExtreme();  bool IsGPULevelHigh(); diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp index b2c6c537e..8b0c50d11 100644 --- a/src/core/tools/freezer.cpp +++ b/src/core/tools/freezer.cpp @@ -14,7 +14,7 @@  namespace Tools {  namespace { -constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60); +constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(1000000000 / 60);  u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) {      switch (width) { @@ -57,7 +57,7 @@ Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& m      : core_timing{core_timing_}, memory{memory_} {      event = Core::Timing::CreateEvent(          "MemoryFreezer::FrameCallback", -        [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); }); +        [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); });      core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event);  } @@ -158,7 +158,7 @@ std::vector<Freezer::Entry> Freezer::GetEntries() const {      return entries;  } -void Freezer::FrameCallback(u64 userdata, s64 cycles_late) { +void Freezer::FrameCallback(u64 userdata, s64 ns_late) {      if (!IsActive()) {          LOG_DEBUG(Common_Memory, "Memory freezer has been deactivated, ending callback events.");          return; @@ -173,7 +173,7 @@ void Freezer::FrameCallback(u64 userdata, s64 cycles_late) {          MemoryWriteWidth(memory, entry.width, entry.address, entry.value);      } -    core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - cycles_late, event); +    core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - ns_late, event);  }  void Freezer::FillEntryReads() { diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index c7038b217..47ef30aa9 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,6 +1,7 @@  add_executable(tests      common/bit_field.cpp      common/bit_utils.cpp +    common/fibers.cpp      common/multi_level_queue.cpp      common/param_package.cpp      common/ring_buffer.cpp diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp new file mode 100644 index 000000000..4fd92428f --- /dev/null +++ b/src/tests/common/fibers.cpp @@ -0,0 +1,358 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <atomic> +#include <cstdlib> +#include <functional> +#include <memory> +#include <thread> +#include <unordered_map> +#include <vector> + +#include <catch2/catch.hpp> +#include <math.h> +#include "common/common_types.h" +#include "common/fiber.h" +#include "common/spin_lock.h" + +namespace Common { + +class TestControl1 { +public: +    TestControl1() = default; + +    void DoWork(); + +    void ExecuteThread(u32 id); + +    std::unordered_map<std::thread::id, u32> ids; +    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; +    std::vector<std::shared_ptr<Common::Fiber>> work_fibers; +    std::vector<u32> items; +    std::vector<u32> results; +}; + +static void WorkControl1(void* control) { +    auto* test_control = static_cast<TestControl1*>(control); +    test_control->DoWork(); +} + +void TestControl1::DoWork() { +    std::thread::id this_id = std::this_thread::get_id(); +    u32 id = ids[this_id]; +    u32 value = items[id]; +    for (u32 i = 0; i < id; i++) { +        value++; +    } +    results[id] = value; +    Fiber::YieldTo(work_fibers[id], thread_fibers[id]); +} + +void TestControl1::ExecuteThread(u32 id) { +    std::thread::id this_id = std::this_thread::get_id(); +    ids[this_id] = id; +    auto thread_fiber = Fiber::ThreadToFiber(); +    thread_fibers[id] = thread_fiber; +    work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this); +    items[id] = rand() % 256; +    Fiber::YieldTo(thread_fibers[id], work_fibers[id]); +    thread_fibers[id]->Exit(); +} + +static void ThreadStart1(u32 id, TestControl1& test_control) { +    test_control.ExecuteThread(id); +} + +/** This test checks for fiber setup configuration and validates that fibers are + *  doing all the work required. + */ +TEST_CASE("Fibers::Setup", "[common]") { +    constexpr std::size_t num_threads = 7; +    TestControl1 test_control{}; +    test_control.thread_fibers.resize(num_threads); +    test_control.work_fibers.resize(num_threads); +    test_control.items.resize(num_threads, 0); +    test_control.results.resize(num_threads, 0); +    std::vector<std::thread> threads; +    for (u32 i = 0; i < num_threads; i++) { +        threads.emplace_back(ThreadStart1, i, std::ref(test_control)); +    } +    for (u32 i = 0; i < num_threads; i++) { +        threads[i].join(); +    } +    for (u32 i = 0; i < num_threads; i++) { +        REQUIRE(test_control.items[i] + i == test_control.results[i]); +    } +} + +class TestControl2 { +public: +    TestControl2() = default; + +    void DoWork1() { +        trap2 = false; +        while (trap.load()) +            ; +        for (u32 i = 0; i < 12000; i++) { +            value1 += i; +        } +        Fiber::YieldTo(fiber1, fiber3); +        std::thread::id this_id = std::this_thread::get_id(); +        u32 id = ids[this_id]; +        assert1 = id == 1; +        value2 += 5000; +        Fiber::YieldTo(fiber1, thread_fibers[id]); +    } + +    void DoWork2() { +        while (trap2.load()) +            ; +        value2 = 2000; +        trap = false; +        Fiber::YieldTo(fiber2, fiber1); +        assert3 = false; +    } + +    void DoWork3() { +        std::thread::id this_id = std::this_thread::get_id(); +        u32 id = ids[this_id]; +        assert2 = id == 0; +        value1 += 1000; +        Fiber::YieldTo(fiber3, thread_fibers[id]); +    } + +    void ExecuteThread(u32 id); + +    void CallFiber1() { +        std::thread::id this_id = std::this_thread::get_id(); +        u32 id = ids[this_id]; +        Fiber::YieldTo(thread_fibers[id], fiber1); +    } + +    void CallFiber2() { +        std::thread::id this_id = std::this_thread::get_id(); +        u32 id = ids[this_id]; +        Fiber::YieldTo(thread_fibers[id], fiber2); +    } + +    void Exit(); + +    bool assert1{}; +    bool assert2{}; +    bool assert3{true}; +    u32 value1{}; +    u32 value2{}; +    std::atomic<bool> trap{true}; +    std::atomic<bool> trap2{true}; +    std::unordered_map<std::thread::id, u32> ids; +    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; +    std::shared_ptr<Common::Fiber> fiber1; +    std::shared_ptr<Common::Fiber> fiber2; +    std::shared_ptr<Common::Fiber> fiber3; +}; + +static void WorkControl2_1(void* control) { +    auto* test_control = static_cast<TestControl2*>(control); +    test_control->DoWork1(); +} + +static void WorkControl2_2(void* control) { +    auto* test_control = static_cast<TestControl2*>(control); +    test_control->DoWork2(); +} + +static void WorkControl2_3(void* control) { +    auto* test_control = static_cast<TestControl2*>(control); +    test_control->DoWork3(); +} + +void TestControl2::ExecuteThread(u32 id) { +    std::thread::id this_id = std::this_thread::get_id(); +    ids[this_id] = id; +    auto thread_fiber = Fiber::ThreadToFiber(); +    thread_fibers[id] = thread_fiber; +} + +void TestControl2::Exit() { +    std::thread::id this_id = std::this_thread::get_id(); +    u32 id = ids[this_id]; +    thread_fibers[id]->Exit(); +} + +static void ThreadStart2_1(u32 id, TestControl2& test_control) { +    test_control.ExecuteThread(id); +    test_control.CallFiber1(); +    test_control.Exit(); +} + +static void ThreadStart2_2(u32 id, TestControl2& test_control) { +    test_control.ExecuteThread(id); +    test_control.CallFiber2(); +    test_control.Exit(); +} + +/** This test checks for fiber thread exchange configuration and validates that fibers are + *  that a fiber has been succesfully transfered from one thread to another and that the TLS + *  region of the thread is kept while changing fibers. + */ +TEST_CASE("Fibers::InterExchange", "[common]") { +    TestControl2 test_control{}; +    test_control.thread_fibers.resize(2); +    test_control.fiber1 = +        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control); +    test_control.fiber2 = +        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control); +    test_control.fiber3 = +        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control); +    std::thread thread1(ThreadStart2_1, 0, std::ref(test_control)); +    std::thread thread2(ThreadStart2_2, 1, std::ref(test_control)); +    thread1.join(); +    thread2.join(); +    REQUIRE(test_control.assert1); +    REQUIRE(test_control.assert2); +    REQUIRE(test_control.assert3); +    REQUIRE(test_control.value2 == 7000); +    u32 cal_value = 0; +    for (u32 i = 0; i < 12000; i++) { +        cal_value += i; +    } +    cal_value += 1000; +    REQUIRE(test_control.value1 == cal_value); +} + +class TestControl3 { +public: +    TestControl3() = default; + +    void DoWork1() { +        value1 += 1; +        Fiber::YieldTo(fiber1, fiber2); +        std::thread::id this_id = std::this_thread::get_id(); +        u32 id = ids[this_id]; +        value3 += 1; +        Fiber::YieldTo(fiber1, thread_fibers[id]); +    } + +    void DoWork2() { +        value2 += 1; +        std::thread::id this_id = std::this_thread::get_id(); +        u32 id = ids[this_id]; +        Fiber::YieldTo(fiber2, thread_fibers[id]); +    } + +    void ExecuteThread(u32 id); + +    void CallFiber1() { +        std::thread::id this_id = std::this_thread::get_id(); +        u32 id = ids[this_id]; +        Fiber::YieldTo(thread_fibers[id], fiber1); +    } + +    void Exit(); + +    u32 value1{}; +    u32 value2{}; +    u32 value3{}; +    std::unordered_map<std::thread::id, u32> ids; +    std::vector<std::shared_ptr<Common::Fiber>> thread_fibers; +    std::shared_ptr<Common::Fiber> fiber1; +    std::shared_ptr<Common::Fiber> fiber2; +}; + +static void WorkControl3_1(void* control) { +    auto* test_control = static_cast<TestControl3*>(control); +    test_control->DoWork1(); +} + +static void WorkControl3_2(void* control) { +    auto* test_control = static_cast<TestControl3*>(control); +    test_control->DoWork2(); +} + +void TestControl3::ExecuteThread(u32 id) { +    std::thread::id this_id = std::this_thread::get_id(); +    ids[this_id] = id; +    auto thread_fiber = Fiber::ThreadToFiber(); +    thread_fibers[id] = thread_fiber; +} + +void TestControl3::Exit() { +    std::thread::id this_id = std::this_thread::get_id(); +    u32 id = ids[this_id]; +    thread_fibers[id]->Exit(); +} + +static void ThreadStart3(u32 id, TestControl3& test_control) { +    test_control.ExecuteThread(id); +    test_control.CallFiber1(); +    test_control.Exit(); +} + +/** This test checks for one two threads racing for starting the same fiber. + *  It checks execution occured in an ordered manner and by no time there were + *  two contexts at the same time. + */ +TEST_CASE("Fibers::StartRace", "[common]") { +    TestControl3 test_control{}; +    test_control.thread_fibers.resize(2); +    test_control.fiber1 = +        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control); +    test_control.fiber2 = +        std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control); +    std::thread thread1(ThreadStart3, 0, std::ref(test_control)); +    std::thread thread2(ThreadStart3, 1, std::ref(test_control)); +    thread1.join(); +    thread2.join(); +    REQUIRE(test_control.value1 == 1); +    REQUIRE(test_control.value2 == 1); +    REQUIRE(test_control.value3 == 1); +} + +class TestControl4; + +static void WorkControl4(void* control); + +class TestControl4 { +public: +    TestControl4() { +        fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl4}, this); +        goal_reached = false; +        rewinded = false; +    } + +    void Execute() { +        thread_fiber = Fiber::ThreadToFiber(); +        Fiber::YieldTo(thread_fiber, fiber1); +        thread_fiber->Exit(); +    } + +    void DoWork() { +        fiber1->SetRewindPoint(std::function<void(void*)>{WorkControl4}, this); +        if (rewinded) { +            goal_reached = true; +            Fiber::YieldTo(fiber1, thread_fiber); +        } +        rewinded = true; +        fiber1->Rewind(); +    } + +    std::shared_ptr<Common::Fiber> fiber1; +    std::shared_ptr<Common::Fiber> thread_fiber; +    bool goal_reached; +    bool rewinded; +}; + +static void WorkControl4(void* control) { +    auto* test_control = static_cast<TestControl4*>(control); +    test_control->DoWork(); +} + +TEST_CASE("Fibers::Rewind", "[common]") { +    TestControl4 test_control{}; +    test_control.Execute(); +    REQUIRE(test_control.goal_reached); +    REQUIRE(test_control.rewinded); +} + +} // namespace Common diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp index ff2d11cc8..e66db1940 100644 --- a/src/tests/core/core_timing.cpp +++ b/src/tests/core/core_timing.cpp @@ -18,29 +18,26 @@ namespace {  // Numbers are chosen randomly to make sure the correct one is given.  constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};  constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals +constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}}; +std::array<s64, 5> delays{};  std::bitset<CB_IDS.size()> callbacks_ran_flags;  u64 expected_callback = 0; -s64 lateness = 0;  template <unsigned int IDX> -void CallbackTemplate(u64 userdata, s64 cycles_late) { +void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {      static_assert(IDX < CB_IDS.size(), "IDX out of range");      callbacks_ran_flags.set(IDX);      REQUIRE(CB_IDS[IDX] == userdata); -    REQUIRE(CB_IDS[IDX] == expected_callback); -    REQUIRE(lateness == cycles_late); -} - -u64 callbacks_done = 0; - -void EmptyCallback(u64 userdata, s64 cycles_late) { -    ++callbacks_done; +    REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]); +    delays[IDX] = nanoseconds_late; +    ++expected_callback;  }  struct ScopeInit final {      ScopeInit() { -        core_timing.Initialize(); +        core_timing.SetMulticore(true); +        core_timing.Initialize([]() {});      }      ~ScopeInit() {          core_timing.Shutdown(); @@ -49,110 +46,101 @@ struct ScopeInit final {      Core::Timing::CoreTiming core_timing;  }; -void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0, -                     int expected_lateness = 0, int cpu_downcount = 0) { -    callbacks_ran_flags = 0; -    expected_callback = CB_IDS[idx]; -    lateness = expected_lateness; - -    // Pretend we executed X cycles of instructions. -    core_timing.SwitchContext(context); -    core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount); -    core_timing.Advance(); -    core_timing.SwitchContext((context + 1) % 4); +#pragma optimize("", off) -    REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags); +u64 TestTimerSpeed(Core::Timing::CoreTiming& core_timing) { +    u64 start = core_timing.GetGlobalTimeNs().count(); +    u64 placebo = 0; +    for (std::size_t i = 0; i < 1000; i++) { +        placebo += core_timing.GetGlobalTimeNs().count(); +    } +    u64 end = core_timing.GetGlobalTimeNs().count(); +    return (end - start);  } + +#pragma optimize("", on) +  } // Anonymous namespace  TEST_CASE("CoreTiming[BasicOrder]", "[core]") {      ScopeInit guard;      auto& core_timing = guard.core_timing; +    std::vector<std::shared_ptr<Core::Timing::EventType>> events{ +        Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>), +        Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>), +        Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>), +        Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>), +        Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>), +    }; + +    expected_callback = 0; + +    core_timing.SyncPause(true); + +    u64 one_micro = 1000U; +    for (std::size_t i = 0; i < events.size(); i++) { +        u64 order = calls_order[i]; +        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); +    } +    /// test pause +    REQUIRE(callbacks_ran_flags.none()); -    std::shared_ptr<Core::Timing::EventType> cb_a = -        Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>); -    std::shared_ptr<Core::Timing::EventType> cb_b = -        Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>); -    std::shared_ptr<Core::Timing::EventType> cb_c = -        Core::Timing::CreateEvent("callbackC", CallbackTemplate<2>); -    std::shared_ptr<Core::Timing::EventType> cb_d = -        Core::Timing::CreateEvent("callbackD", CallbackTemplate<3>); -    std::shared_ptr<Core::Timing::EventType> cb_e = -        Core::Timing::CreateEvent("callbackE", CallbackTemplate<4>); - -    // Enter slice 0 -    core_timing.ResetRun(); - -    // D -> B -> C -> A -> E -    core_timing.SwitchContext(0); -    core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]); -    REQUIRE(1000 == core_timing.GetDowncount()); -    core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]); -    REQUIRE(500 == core_timing.GetDowncount()); -    core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]); -    REQUIRE(500 == core_timing.GetDowncount()); -    core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]); -    REQUIRE(100 == core_timing.GetDowncount()); -    core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]); -    REQUIRE(100 == core_timing.GetDowncount()); - -    AdvanceAndCheck(core_timing, 3, 0); -    AdvanceAndCheck(core_timing, 1, 1); -    AdvanceAndCheck(core_timing, 2, 2); -    AdvanceAndCheck(core_timing, 0, 3); -    AdvanceAndCheck(core_timing, 4, 0); -} - -TEST_CASE("CoreTiming[FairSharing]", "[core]") { +    core_timing.Pause(false); // No need to sync -    ScopeInit guard; -    auto& core_timing = guard.core_timing; +    while (core_timing.HasPendingEvents()) +        ; -    std::shared_ptr<Core::Timing::EventType> empty_callback = -        Core::Timing::CreateEvent("empty_callback", EmptyCallback); +    REQUIRE(callbacks_ran_flags.all()); -    callbacks_done = 0; -    u64 MAX_CALLBACKS = 10; -    for (std::size_t i = 0; i < 10; i++) { -        core_timing.ScheduleEvent(i * 3333U, empty_callback, 0); +    for (std::size_t i = 0; i < delays.size(); i++) { +        const double delay = static_cast<double>(delays[i]); +        const double micro = delay / 1000.0f; +        const double mili = micro / 1000.0f; +        printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);      } - -    const s64 advances = MAX_SLICE_LENGTH / 10; -    core_timing.ResetRun(); -    u64 current_time = core_timing.GetTicks(); -    bool keep_running{}; -    do { -        keep_running = false; -        for (u32 active_core = 0; active_core < 4; ++active_core) { -            core_timing.SwitchContext(active_core); -            if (core_timing.CanCurrentContextRun()) { -                core_timing.AddTicks(std::min<s64>(advances, core_timing.GetDowncount())); -                core_timing.Advance(); -            } -            keep_running |= core_timing.CanCurrentContextRun(); -        } -    } while (keep_running); -    u64 current_time_2 = core_timing.GetTicks(); - -    REQUIRE(MAX_CALLBACKS == callbacks_done); -    REQUIRE(current_time_2 == current_time + MAX_SLICE_LENGTH * 4);  } -TEST_CASE("Core::Timing[PredictableLateness]", "[core]") { +TEST_CASE("CoreTiming[BasicOrderNoPausing]", "[core]") {      ScopeInit guard;      auto& core_timing = guard.core_timing; +    std::vector<std::shared_ptr<Core::Timing::EventType>> events{ +        Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>), +        Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>), +        Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>), +        Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>), +        Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>), +    }; + +    core_timing.SyncPause(true); +    core_timing.SyncPause(false); + +    expected_callback = 0; + +    u64 start = core_timing.GetGlobalTimeNs().count(); +    u64 one_micro = 1000U; +    for (std::size_t i = 0; i < events.size(); i++) { +        u64 order = calls_order[i]; +        core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]); +    } +    u64 end = core_timing.GetGlobalTimeNs().count(); +    const double scheduling_time = static_cast<double>(end - start); +    const double timer_time = static_cast<double>(TestTimerSpeed(core_timing)); -    std::shared_ptr<Core::Timing::EventType> cb_a = -        Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>); -    std::shared_ptr<Core::Timing::EventType> cb_b = -        Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>); +    while (core_timing.HasPendingEvents()) +        ; -    // Enter slice 0 -    core_timing.ResetRun(); +    REQUIRE(callbacks_ran_flags.all()); -    core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]); -    core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]); +    for (std::size_t i = 0; i < delays.size(); i++) { +        const double delay = static_cast<double>(delays[i]); +        const double micro = delay / 1000.0f; +        const double mili = micro / 1000.0f; +        printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili); +    } -    AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10) -    AdvanceAndCheck(core_timing, 1, 1, 50, -50); +    const double micro = scheduling_time / 1000.0f; +    const double mili = micro / 1000.0f; +    printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili); +    printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f, +           timer_time / 1000000.f);  } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 099bb446e..21c46a567 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,8 @@ add_library(video_core STATIC      buffer_cache/buffer_cache.h      buffer_cache/map_interval.cpp      buffer_cache/map_interval.h +    compatible_formats.cpp +    compatible_formats.h      dirty_flags.cpp      dirty_flags.h      dma_pusher.cpp @@ -27,6 +29,8 @@ add_library(video_core STATIC      engines/shader_type.h      macro/macro.cpp      macro/macro.h +    macro/macro_hle.cpp +    macro/macro_hle.h      macro/macro_interpreter.cpp      macro/macro_interpreter.h      macro/macro_jit_x64.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 308d8b55f..c6479af9f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -41,16 +41,20 @@ class BufferCache {      static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;  public: -    using BufferInfo = std::pair<BufferType, u64>; +    struct BufferInfo { +        BufferType handle; +        u64 offset; +        u64 address; +    };      BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,                              bool is_written = false, bool use_fast_cbuf = false) {          std::lock_guard lock{mutex}; -        const auto& memory_manager = system.GPU().MemoryManager(); +        auto& memory_manager = system.GPU().MemoryManager();          const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);          if (!cpu_addr_opt) { -            return {GetEmptyBuffer(size), 0}; +            return GetEmptyBuffer(size);          }          const VAddr cpu_addr = *cpu_addr_opt; @@ -59,7 +63,6 @@ public:          constexpr std::size_t max_stream_size = 0x800;          if (use_fast_cbuf || size < max_stream_size) {              if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { -                auto& memory_manager = system.GPU().MemoryManager();                  const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);                  if (use_fast_cbuf) {                      u8* dest; @@ -89,7 +92,7 @@ public:          Buffer* const block = GetBlock(cpu_addr, size);          MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);          if (!map) { -            return {GetEmptyBuffer(size), 0}; +            return GetEmptyBuffer(size);          }          if (is_written) {              map->MarkAsModified(true, GetModifiedTicks()); @@ -102,7 +105,7 @@ public:              }          } -        return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; +        return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};      }      /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -255,27 +258,17 @@ public:          committed_flushes.pop_front();      } -    virtual BufferType GetEmptyBuffer(std::size_t size) = 0; +    virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;  protected:      explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, -                         std::unique_ptr<StreamBuffer> stream_buffer_) -        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, -          stream_buffer_handle{stream_buffer->Handle()} {} +                         std::unique_ptr<StreamBuffer> stream_buffer) +        : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}      ~BufferCache() = default;      virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; -    virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                                 const u8* data) = 0; - -    virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                                   u8* data) = 0; - -    virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, -                           std::size_t dst_offset, std::size_t size) = 0; -      virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {          return {};      } @@ -329,19 +322,18 @@ protected:      }  private: -    MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, -                            std::size_t size) { +    MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {          const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);          if (overlaps.empty()) {              auto& memory_manager = system.GPU().MemoryManager();              const VAddr cpu_addr_end = cpu_addr + size;              if (memory_manager.IsGranularRange(gpu_addr, size)) {                  u8* host_ptr = memory_manager.GetPointer(gpu_addr); -                UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); +                block->Upload(block->Offset(cpu_addr), size, host_ptr);              } else {                  staging_buffer.resize(size);                  memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); -                UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); +                block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());              }              return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));          } @@ -384,8 +376,7 @@ private:          return map;      } -    void UpdateBlock(const Buffer* block, VAddr start, VAddr end, -                     const VectorMapInterval& overlaps) { +    void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {          const IntervalType base_interval{start, end};          IntervalSet interval_set{};          interval_set.add(base_interval); @@ -400,7 +391,7 @@ private:              }              staging_buffer.resize(size);              system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); -            UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); +            block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());          }      } @@ -437,7 +428,7 @@ private:          const std::size_t size = map->end - map->start;          staging_buffer.resize(size); -        DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); +        block->Download(block->Offset(map->start), size, staging_buffer.data());          system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);          map->MarkAsModified(false, 0);      } @@ -450,7 +441,7 @@ private:          buffer_ptr += size;          buffer_offset += size; -        return {stream_buffer_handle, uploaded_offset}; +        return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};      }      void AlignBuffer(std::size_t alignment) { @@ -465,7 +456,7 @@ private:          const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;          const VAddr cpu_addr = buffer->CpuAddr();          std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); -        CopyBlock(*buffer, *new_buffer, 0, 0, old_size); +        new_buffer->CopyFrom(*buffer, 0, 0, old_size);          QueueDestruction(std::move(buffer));          const VAddr cpu_addr_end = cpu_addr + new_size - 1; @@ -487,8 +478,8 @@ private:          const std::size_t new_size = size_1 + size_2;          std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); -        CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); -        CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); +        new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); +        new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);          QueueDestruction(std::move(first));          QueueDestruction(std::move(second)); diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp new file mode 100644 index 000000000..6c426b035 --- /dev/null +++ b/src/video_core/compatible_formats.cpp @@ -0,0 +1,162 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bitset> +#include <cstddef> + +#include "video_core/compatible_formats.h" +#include "video_core/surface.h" + +namespace VideoCore::Surface { + +namespace { + +// Compatibility table taken from Table 3.X.2 in: +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt + +constexpr std::array VIEW_CLASS_128_BITS = { +    PixelFormat::RGBA32F, +    PixelFormat::RGBA32UI, +}; +// Missing formats: +// PixelFormat::RGBA32I + +constexpr std::array VIEW_CLASS_96_BITS = { +    PixelFormat::RGB32F, +}; +// Missing formats: +// PixelFormat::RGB32UI, +// PixelFormat::RGB32I, + +constexpr std::array VIEW_CLASS_64_BITS = { +    PixelFormat::RGBA16F, PixelFormat::RG32F,   PixelFormat::RGBA16UI, PixelFormat::RG32UI, +    PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S, +}; +// Missing formats: +// PixelFormat::RGBA16I +// PixelFormat::RG32I + +// TODO: How should we handle 48 bits? + +constexpr std::array VIEW_CLASS_32_BITS = { +    PixelFormat::RG16F,        PixelFormat::R11FG11FB10F, PixelFormat::R32F, +    PixelFormat::A2B10G10R10U, PixelFormat::RG16UI,       PixelFormat::R32UI, +    PixelFormat::RG16I,        PixelFormat::R32I,         PixelFormat::ABGR8U, +    PixelFormat::RG16,         PixelFormat::ABGR8S,       PixelFormat::RG16S, +    PixelFormat::RGBA8_SRGB,   PixelFormat::E5B9G9R9F,    PixelFormat::BGRA8, +    PixelFormat::BGRA8_SRGB, +}; +// Missing formats: +// PixelFormat::RGBA8UI +// PixelFormat::RGBA8I +// PixelFormat::RGB10_A2_UI + +// TODO: How should we handle 24 bits? + +constexpr std::array VIEW_CLASS_16_BITS = { +    PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I, +    PixelFormat::RG8U, PixelFormat::R16U,  PixelFormat::RG8S,  PixelFormat::R16S, +}; +// Missing formats: +// PixelFormat::RG8I + +constexpr std::array VIEW_CLASS_8_BITS = { +    PixelFormat::R8UI, +    PixelFormat::R8U, +}; +// Missing formats: +// PixelFormat::R8I +// PixelFormat::R8S + +constexpr std::array VIEW_CLASS_RGTC1_RED = { +    PixelFormat::DXN1, +}; +// Missing formats: +// COMPRESSED_SIGNED_RED_RGTC1 + +constexpr std::array VIEW_CLASS_RGTC2_RG = { +    PixelFormat::DXN2UNORM, +    PixelFormat::DXN2SNORM, +}; + +constexpr std::array VIEW_CLASS_BPTC_UNORM = { +    PixelFormat::BC7U, +    PixelFormat::BC7U_SRGB, +}; + +constexpr std::array VIEW_CLASS_BPTC_FLOAT = { +    PixelFormat::BC6H_SF16, +    PixelFormat::BC6H_UF16, +}; + +// Compatibility table taken from Table 4.X.1 in: +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt + +constexpr std::array COPY_CLASS_128_BITS = { +    PixelFormat::RGBA32UI,   PixelFormat::RGBA32F,   PixelFormat::DXT23, +    PixelFormat::DXT23_SRGB, PixelFormat::DXT45,     PixelFormat::DXT45_SRGB, +    PixelFormat::DXN2SNORM,  PixelFormat::BC7U,      PixelFormat::BC7U_SRGB, +    PixelFormat::BC6H_SF16,  PixelFormat::BC6H_UF16, +}; +// Missing formats: +// PixelFormat::RGBA32I +// COMPRESSED_RG_RGTC2 + +constexpr std::array COPY_CLASS_64_BITS = { +    PixelFormat::RGBA16F, PixelFormat::RG32F,   PixelFormat::RGBA16UI,  PixelFormat::RG32UI, +    PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1, + +}; +// Missing formats: +// PixelFormat::RGBA16I +// PixelFormat::RG32I, +// COMPRESSED_RGB_S3TC_DXT1_EXT +// COMPRESSED_SRGB_S3TC_DXT1_EXT +// COMPRESSED_RGBA_S3TC_DXT1_EXT +// COMPRESSED_SIGNED_RED_RGTC1 + +void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { +    compatiblity[format_a][format_b] = true; +    compatiblity[format_b][format_a] = true; +} + +void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { +    Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); +} + +template <typename Range> +void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { +    for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { +        for (auto it_b = it_a; it_b != range.end(); ++it_b) { +            Enable(compatibility, *it_a, *it_b); +        } +    } +} + +} // Anonymous namespace + +FormatCompatibility::FormatCompatibility() { +    for (size_t i = 0; i < MaxPixelFormat; ++i) { +        // Identity is allowed +        Enable(view, i, i); +    } + +    EnableRange(view, VIEW_CLASS_128_BITS); +    EnableRange(view, VIEW_CLASS_96_BITS); +    EnableRange(view, VIEW_CLASS_64_BITS); +    EnableRange(view, VIEW_CLASS_32_BITS); +    EnableRange(view, VIEW_CLASS_16_BITS); +    EnableRange(view, VIEW_CLASS_8_BITS); +    EnableRange(view, VIEW_CLASS_RGTC1_RED); +    EnableRange(view, VIEW_CLASS_RGTC2_RG); +    EnableRange(view, VIEW_CLASS_BPTC_UNORM); +    EnableRange(view, VIEW_CLASS_BPTC_FLOAT); + +    copy = view; +    EnableRange(copy, COPY_CLASS_128_BITS); +    EnableRange(copy, COPY_CLASS_64_BITS); +} + +} // namespace VideoCore::Surface diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h new file mode 100644 index 000000000..d1082566d --- /dev/null +++ b/src/video_core/compatible_formats.h @@ -0,0 +1,32 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <bitset> +#include <cstddef> + +#include "video_core/surface.h" + +namespace VideoCore::Surface { + +class FormatCompatibility { +public: +    using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>; + +    explicit FormatCompatibility(); + +    bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { +        return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; +    } + +    bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { +        return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; +    } + +private: +    Table view; +    Table copy; +}; + +} // namespace VideoCore::Surface diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ea3c8a963..c01436295 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -128,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)          ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());      // Execute the current macro. -    macro_engine->Execute(macro_positions[entry], parameters); +    macro_engine->Execute(*this, macro_positions[entry], parameters);      if (mme_draw.current_mode != MMEDrawMode::Undefined) {          FlushMMEInlineDraw();      } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d5fe25065..ef1618990 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1418,6 +1418,14 @@ public:          return execute_on;      } +    VideoCore::RasterizerInterface& GetRasterizer() { +        return rasterizer; +    } + +    const VideoCore::RasterizerInterface& GetRasterizer() const { +        return rasterizer; +    } +      /// Notify a memory write has happened.      void OnMemoryWrite() {          dirty.flags |= dirty.on_write_stores; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e7cb87589..d374b73cf 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -661,6 +661,10 @@ union Instruction {      constexpr Instruction(u64 value) : value{value} {}      constexpr Instruction(const Instruction& instr) : value(instr.value) {} +    constexpr bool Bit(u64 offset) const { +        return ((value >> offset) & 1) != 0; +    } +      BitField<0, 8, Register> gpr0;      BitField<8, 8, Register> gpr8;      union { @@ -1874,7 +1878,9 @@ public:          HSETP2_C,          HSETP2_R,          HSETP2_IMM, +        HSET2_C,          HSET2_R, +        HSET2_IMM,          POPC_C,          POPC_R,          POPC_IMM, @@ -2194,7 +2200,9 @@ private:              INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),              INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),              INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), +            INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),              INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), +            INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),              INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),              INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),              INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8eb017f65..482e49711 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -2,6 +2,8 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <chrono> +  #include "common/assert.h"  #include "common/microprofile.h"  #include "core/core.h" @@ -154,8 +156,7 @@ u64 GPU::GetTicks() const {      constexpr u64 gpu_ticks_num = 384;      constexpr u64 gpu_ticks_den = 625; -    const u64 cpu_ticks = system.CoreTiming().GetTicks(); -    u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); +    u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();      if (Settings::values.use_fast_gpu_time) {          nanoseconds /= 256;      } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a1b4c305c..2c42483bd 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -284,6 +284,12 @@ public:      /// core timing events.      virtual void Start() = 0; +    /// Obtain the CPU Context +    virtual void ObtainContext() = 0; + +    /// Release the CPU Context +    virtual void ReleaseContext() = 0; +      /// Push GPU command entries to be processed      virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 53305ab43..7b855f63e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa  GPUAsynch::~GPUAsynch() = default;  void GPUAsynch::Start() { -    cpu_context->MakeCurrent();      gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);  } +void GPUAsynch::ObtainContext() { +    cpu_context->MakeCurrent(); +} + +void GPUAsynch::ReleaseContext() { +    cpu_context->DoneCurrent(); +} +  void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {      gpu_thread.SubmitList(std::move(entries));  } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 517658612..15e9f1d38 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,8 @@ public:      ~GPUAsynch() override;      void Start() override; +    void ObtainContext() override; +    void ReleaseContext() override;      void PushGPUEntries(Tegra::CommandList&& entries) override;      void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;      void FlushRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 6f38a672a..aaeb9811d 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase  GPUSynch::~GPUSynch() = default; -void GPUSynch::Start() { +void GPUSynch::Start() {} + +void GPUSynch::ObtainContext() {      context->MakeCurrent();  } +void GPUSynch::ReleaseContext() { +    context->DoneCurrent(); +} +  void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {      dma_pusher->Push(std::move(entries));      dma_pusher->DispatchCalls(); diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 4a6e9a01d..762c20aa5 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,8 @@ public:      ~GPUSynch() override;      void Start() override; +    void ObtainContext() override; +    void ReleaseContext() override;      void PushGPUEntries(Tegra::CommandList&& entries) override;      void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;      void FlushRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c3bb4fe06..738c6f0c1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,7 @@  #include "common/assert.h"  #include "common/microprofile.h" +#include "common/thread.h"  #include "core/core.h"  #include "core/frontend/emu_window.h"  #include "core/settings.h" @@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread {  static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,                        Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,                        SynchState& state) { -    MicroProfileOnThreadCreate("GpuThread"); +    std::string name = "yuzu:GPU"; +    MicroProfileOnThreadCreate(name.c_str()); +    Common::SetCurrentThreadName(name.c_str()); +    Common::SetCurrentThreadPriority(Common::ThreadPriority::High); +    system.RegisterHostThread();      // Wait for first GPU command before acquiring the window context      while (state.queue.Empty()) diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 89077a2d8..a50e7b4e0 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,32 +2,78 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <optional> +#include <boost/container_hash/hash.hpp>  #include "common/assert.h"  #include "common/logging/log.h"  #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h"  #include "video_core/macro/macro.h" +#include "video_core/macro/macro_hle.h"  #include "video_core/macro/macro_interpreter.h"  #include "video_core/macro/macro_jit_x64.h"  namespace Tegra { +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) +    : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} + +MacroEngine::~MacroEngine() = default; +  void MacroEngine::AddCode(u32 method, u32 data) {      uploaded_macro_code[method].push_back(data);  } -void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { +void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, +                          const std::vector<u32>& parameters) {      auto compiled_macro = macro_cache.find(method);      if (compiled_macro != macro_cache.end()) { -        compiled_macro->second->Execute(parameters, method); +        const auto& cache_info = compiled_macro->second; +        if (cache_info.has_hle_program) { +            cache_info.hle_program->Execute(parameters, method); +        } else { +            cache_info.lle_program->Execute(parameters, method); +        }      } else {          // Macro not compiled, check if it's uploaded and if so, compile it -        auto macro_code = uploaded_macro_code.find(method); +        std::optional<u32> mid_method = std::nullopt; +        const auto macro_code = uploaded_macro_code.find(method);          if (macro_code == uploaded_macro_code.end()) { -            UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); -            return; +            for (const auto& [method_base, code] : uploaded_macro_code) { +                if (method >= method_base && (method - method_base) < code.size()) { +                    mid_method = method_base; +                    break; +                } +            } +            if (!mid_method.has_value()) { +                UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); +                return; +            } +        } +        auto& cache_info = macro_cache[method]; + +        if (!mid_method.has_value()) { +            cache_info.lle_program = Compile(macro_code->second); +            cache_info.hash = boost::hash_value(macro_code->second); +        } else { +            const auto& macro_cached = uploaded_macro_code[mid_method.value()]; +            const auto rebased_method = method - mid_method.value(); +            auto& code = uploaded_macro_code[method]; +            code.resize(macro_cached.size() - rebased_method); +            std::memcpy(code.data(), macro_cached.data() + rebased_method, +                        code.size() * sizeof(u32)); +            cache_info.hash = boost::hash_value(code); +            cache_info.lle_program = Compile(code); +        } + +        auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); +        if (hle_program.has_value()) { +            cache_info.has_hle_program = true; +            cache_info.hle_program = std::move(hle_program.value()); +            cache_info.hle_program->Execute(parameters, method); +        } else { +            cache_info.lle_program->Execute(parameters, method);          } -        macro_cache[method] = Compile(macro_code->second); -        macro_cache[method]->Execute(parameters, method);      }  } diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index b76ed891f..4d00b84b0 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h @@ -11,9 +11,11 @@  #include "common/common_types.h"  namespace Tegra { +  namespace Engines {  class Maxwell3D;  } +  namespace Macro {  constexpr std::size_t NUM_MACRO_REGISTERS = 8;  enum class Operation : u32 { @@ -94,6 +96,8 @@ union MethodAddress {  } // namespace Macro +class HLEMacro; +  class CachedMacro {  public:      virtual ~CachedMacro() = default; @@ -107,20 +111,29 @@ public:  class MacroEngine {  public: -    virtual ~MacroEngine() = default; +    explicit MacroEngine(Engines::Maxwell3D& maxwell3d); +    virtual ~MacroEngine();      // Store the uploaded macro code to compile them when they're called.      void AddCode(u32 method, u32 data);      // Compiles the macro if its not in the cache, and executes the compiled macro -    void Execute(u32 method, const std::vector<u32>& parameters); +    void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);  protected:      virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;  private: -    std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache; +    struct CacheInfo { +        std::unique_ptr<CachedMacro> lle_program{}; +        std::unique_ptr<CachedMacro> hle_program{}; +        u64 hash{}; +        bool has_hle_program{}; +    }; + +    std::unordered_map<u32, CacheInfo> macro_cache;      std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; +    std::unique_ptr<HLEMacro> hle_macros;  };  std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp new file mode 100644 index 000000000..410f99018 --- /dev/null +++ b/src/video_core/macro/macro_hle.cpp @@ -0,0 +1,113 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/macro/macro_hle.h" +#include "video_core/rasterizer_interface.h" + +namespace Tegra { + +namespace { +// HLE'd functions +static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, +                                 const std::vector<u32>& parameters) { +    const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); + +    maxwell3d.regs.draw.topology.Assign( +        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & +                                                                        ~(0x3ffffff << 26))); +    maxwell3d.regs.vb_base_instance = parameters[5]; +    maxwell3d.mme_draw.instance_count = instance_count; +    maxwell3d.regs.vb_element_base = parameters[3]; +    maxwell3d.regs.index_array.count = parameters[1]; +    maxwell3d.regs.index_array.first = parameters[4]; + +    if (maxwell3d.ShouldExecute()) { +        maxwell3d.GetRasterizer().Draw(true, true); +    } +    maxwell3d.regs.index_array.count = 0; +    maxwell3d.mme_draw.instance_count = 0; +    maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, +                                 const std::vector<u32>& parameters) { +    const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + +    maxwell3d.regs.vertex_buffer.first = parameters[3]; +    maxwell3d.regs.vertex_buffer.count = parameters[1]; +    maxwell3d.regs.vb_base_instance = parameters[4]; +    maxwell3d.regs.draw.topology.Assign( +        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); +    maxwell3d.mme_draw.instance_count = count; + +    if (maxwell3d.ShouldExecute()) { +        maxwell3d.GetRasterizer().Draw(false, true); +    } +    maxwell3d.regs.vertex_buffer.count = 0; +    maxwell3d.mme_draw.instance_count = 0; +    maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} + +static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, +                                 const std::vector<u32>& parameters) { +    const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); +    const u32 element_base = parameters[4]; +    const u32 base_instance = parameters[5]; +    maxwell3d.regs.index_array.first = parameters[3]; +    maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? +    maxwell3d.regs.index_array.count = parameters[1]; +    maxwell3d.regs.vb_element_base = element_base; +    maxwell3d.regs.vb_base_instance = base_instance; +    maxwell3d.mme_draw.instance_count = instance_count; +    maxwell3d.CallMethodFromMME(0x8e3, 0x640); +    maxwell3d.CallMethodFromMME(0x8e4, element_base); +    maxwell3d.CallMethodFromMME(0x8e5, base_instance); +    maxwell3d.regs.draw.topology.Assign( +        static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); +    if (maxwell3d.ShouldExecute()) { +        maxwell3d.GetRasterizer().Draw(true, true); +    } +    maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? +    maxwell3d.regs.index_array.count = 0; +    maxwell3d.regs.vb_element_base = 0x0; +    maxwell3d.regs.vb_base_instance = 0x0; +    maxwell3d.mme_draw.instance_count = 0; +    maxwell3d.CallMethodFromMME(0x8e3, 0x640); +    maxwell3d.CallMethodFromMME(0x8e4, 0x0); +    maxwell3d.CallMethodFromMME(0x8e5, 0x0); +    maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; +} +} // namespace + +constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ +    std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), +    std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), +    std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7), +}}; + +HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +HLEMacro::~HLEMacro() = default; + +std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { +    const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), +                                 [hash](const auto& pair) { return pair.first == hash; }); +    if (it == hle_funcs.end()) { +        return std::nullopt; +    } +    return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); +} + +HLEMacroImpl::~HLEMacroImpl() = default; + +HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) +    : maxwell3d(maxwell3d), func(func) {} + +void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { +    func(maxwell3d, parameters); +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h new file mode 100644 index 000000000..37af875a0 --- /dev/null +++ b/src/video_core/macro/macro_hle.h @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> +#include <vector> +#include "common/common_types.h" +#include "video_core/macro/macro.h" + +namespace Tegra { + +namespace Engines { +class Maxwell3D; +} + +using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); + +class HLEMacro { +public: +    explicit HLEMacro(Engines::Maxwell3D& maxwell3d); +    ~HLEMacro(); + +    std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; + +private: +    Engines::Maxwell3D& maxwell3d; +}; + +class HLEMacroImpl : public CachedMacro { +public: +    explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); +    ~HLEMacroImpl(); + +    void Execute(const std::vector<u32>& parameters, u32 method) override; + +private: +    Engines::Maxwell3D& maxwell3d; +    HLEFunction func; +}; + +} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 5edff27aa..aa5256419 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -11,7 +11,8 @@  MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));  namespace Tegra { -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) +    : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}  std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {      return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 389b58989..07292702f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -28,7 +28,8 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({      BRANCH_HOLDER,  }); -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} +MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) +    : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}  std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {      return std::make_unique<MacroJITx64Impl>(maxwell3d, code); @@ -553,7 +554,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {  }  void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { -    auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { +    const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {          // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero          // register.          if (reg == 0) { @@ -561,7 +562,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3          }          mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);      }; -    auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; +    const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };      switch (operation) {      case Macro::ResultOperation::IgnoreAndFetch: diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index dbee9f634..ff5505d12 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si      return range == inner_size;  } -void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { +void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, +                              const std::size_t size) const {      std::size_t remaining_size{size}; -    std::size_t page_index{src_addr >> page_bits}; -    std::size_t page_offset{src_addr & page_mask}; +    std::size_t page_index{gpu_src_addr >> page_bits}; +    std::size_t page_offset{gpu_src_addr & page_mask};      auto& memory = system.Memory(); @@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s      }  } -void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, +void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,                                      const std::size_t size) const {      std::size_t remaining_size{size}; -    std::size_t page_index{src_addr >> page_bits}; -    std::size_t page_offset{src_addr & page_mask}; +    std::size_t page_index{gpu_src_addr >> page_bits}; +    std::size_t page_offset{gpu_src_addr & page_mask};      auto& memory = system.Memory(); @@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,      }  } -void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { +void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, +                               const std::size_t size) {      std::size_t remaining_size{size}; -    std::size_t page_index{dest_addr >> page_bits}; -    std::size_t page_offset{dest_addr & page_mask}; +    std::size_t page_index{gpu_dest_addr >> page_bits}; +    std::size_t page_offset{gpu_dest_addr & page_mask};      auto& memory = system.Memory(); @@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const      }  } -void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, +void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,                                       const std::size_t size) {      std::size_t remaining_size{size}; -    std::size_t page_index{dest_addr >> page_bits}; -    std::size_t page_offset{dest_addr & page_mask}; +    std::size_t page_index{gpu_dest_addr >> page_bits}; +    std::size_t page_offset{gpu_dest_addr & page_mask};      auto& memory = system.Memory(); @@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,      }  } -void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { +void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, +                              const std::size_t size) {      std::vector<u8> tmp_buffer(size); -    ReadBlock(src_addr, tmp_buffer.data(), size); -    WriteBlock(dest_addr, tmp_buffer.data(), size); +    ReadBlock(gpu_src_addr, tmp_buffer.data(), size); +    WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);  } -void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { +void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, +                                    const std::size_t size) {      std::vector<u8> tmp_buffer(size); -    ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); -    WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); +    ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size); +    WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);  }  bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0ddd52d5a..87658e87a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -79,9 +79,9 @@ public:       * in the Host Memory counterpart. Note: This functions cause Host GPU Memory       * Flushes and Invalidations, respectively to each operation.       */ -    void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; -    void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); -    void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); +    void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; +    void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); +    void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);      /**       * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and @@ -93,9 +93,9 @@ public:       * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture       * being flushed.       */ -    void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; -    void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); -    void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); +    void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; +    void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); +    void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);      /**       * IsGranularRange checks if a gpu region can be simply read with a pointer diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ad0577a4f..e461e4c70 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,21 +22,53 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;  MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) +    : VideoCommon::BufferBlock{cpu_addr, size} {      gl_buffer.Create();      glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); +    if (device.HasVertexBufferUnifiedMemory()) { +        glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); +        glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); +    }  }  Buffer::~Buffer() = default; +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { +    glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), +                         data); +} + +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { +    MICROPROFILE_SCOPE(OpenGL_Buffer_Download); +    const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); +    const GLintptr gl_offset = static_cast<GLintptr>(offset); +    if (read_buffer.handle == 0) { +        read_buffer.Create(); +        glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr, +                          GL_STREAM_READ); +    } +    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); +    glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); +    glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); +} + +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, +                      std::size_t size) { +    glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), +                             static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +} +  OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, -                               const Device& device, std::size_t stream_size) -    : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { +                               const Device& device_, std::size_t stream_size) +    : GenericBufferCache{rasterizer, system, +                         std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, +      device{device_} {      if (!device.HasFastBufferSubData()) {          return;      } -    static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); +    static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);      glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));      for (const GLuint cbuf : cbufs) {          glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); @@ -48,39 +80,20 @@ OGLBufferCache::~OGLBufferCache() {  }  std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { -    return std::make_shared<Buffer>(cpu_addr, size); +    return std::make_shared<Buffer>(device, cpu_addr, size);  } -GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { -    return 0; -} - -void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                                     const u8* data) { -    glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), -                         static_cast<GLsizeiptr>(size), data); -} - -void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                                       u8* data) { -    MICROPROFILE_SCOPE(OpenGL_Buffer_Download); -    glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); -    glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), -                            static_cast<GLsizeiptr>(size), data); -} - -void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, -                               std::size_t dst_offset, std::size_t size) { -    glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset), -                             static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { +    return {0, 0, 0};  }  OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,                                                               std::size_t size) {      DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));      const GLuint cbuf = cbufs[cbuf_cursor++]; +      glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); -    return {cbuf, 0}; +    return {cbuf, 0, 0};  }  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a49aaf9c4..88fdc0536 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -25,15 +25,28 @@ class RasterizerOpenGL;  class Buffer : public VideoCommon::BufferBlock {  public: -    explicit Buffer(VAddr cpu_addr, const std::size_t size); +    explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);      ~Buffer(); -    GLuint Handle() const { +    void Upload(std::size_t offset, std::size_t size, const u8* data); + +    void Download(std::size_t offset, std::size_t size, u8* data); + +    void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, +                  std::size_t size); + +    GLuint Handle() const noexcept {          return gl_buffer.handle;      } +    u64 Address() const noexcept { +        return gpu_address; +    } +  private:      OGLBuffer gl_buffer; +    OGLBuffer read_buffer; +    u64 gpu_address = 0;  };  using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; @@ -43,7 +56,7 @@ public:                              const Device& device, std::size_t stream_size);      ~OGLBufferCache(); -    GLuint GetEmptyBuffer(std::size_t) override; +    BufferInfo GetEmptyBuffer(std::size_t) override;      void Acquire() noexcept {          cbuf_cursor = 0; @@ -52,22 +65,16 @@ public:  protected:      std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; -    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                         const u8* data) override; - -    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                           u8* data) override; - -    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, -                   std::size_t dst_offset, std::size_t size) override; -      BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;  private: +    static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * +                                             Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + +    const Device& device; +      std::size_t cbuf_cursor = 0; -    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * -                           Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram> -        cbufs; +    std::array<GLuint, NUM_CBUFS> cbufs{};  };  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b31d604e4..208fc6167 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -178,7 +178,7 @@ bool IsASTCSupported() {          for (const GLenum format : formats) {              for (const GLenum support : required_support) {                  GLint value; -                glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); +                glGetInternalformativ(target, format, support, 1, &value);                  if (value != GL_FULL_SUPPORT) {                      return false;                  } @@ -193,6 +193,7 @@ bool IsASTCSupported() {  Device::Device()      : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {      const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); +    const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));      const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));      const std::vector extensions = GetExtensions(); @@ -216,12 +217,18 @@ Device::Device()      has_shader_ballot = GLAD_GL_ARB_shader_ballot;      has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;      has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); +    has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");      has_astc = IsASTCSupported();      has_variable_aoffi = TestVariableAoffi();      has_component_indexing_bug = is_amd;      has_precise_bug = TestPreciseBug(); -    has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;      has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; +    has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; + +    // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive +    // uniform buffers as "push constants" +    has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; +      use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&                             GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&                             GLAD_GL_NV_transform_feedback2; @@ -245,6 +252,7 @@ Device::Device(std::nullptr_t) {      has_shader_ballot = true;      has_vertex_viewport_layer = true;      has_image_load_formatted = true; +    has_texture_shadow_lod = true;      has_variable_aoffi = true;  } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 145347943..e1d811966 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -68,6 +68,14 @@ public:          return has_image_load_formatted;      } +    bool HasTextureShadowLod() const { +        return has_texture_shadow_lod; +    } + +    bool HasVertexBufferUnifiedMemory() const { +        return has_vertex_buffer_unified_memory; +    } +      bool HasASTC() const {          return has_astc;      } @@ -110,6 +118,8 @@ private:      bool has_shader_ballot{};      bool has_vertex_viewport_layer{};      bool has_image_load_formatted{}; +    bool has_texture_shadow_lod{}; +    bool has_vertex_buffer_unified_memory{};      bool has_astc{};      bool has_variable_aoffi{};      bool has_component_indexing_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6c11320..e960a0ef1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =  constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =      NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NumSupportedVertexAttributes = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;  template <typename Engine, typename Entry>  Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, @@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() {      // avoid OpenGL errors.      // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't      // assume every shader uses them all. -    for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { +    for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {          if (!flags[Dirty::VertexFormat0 + index]) {              continue;          } @@ -212,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() {          if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||              attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {              glVertexAttribIFormat(gl_index, attrib.ComponentCount(), -                                  MaxwellToGL::VertexType(attrib), attrib.offset); +                                  MaxwellToGL::VertexFormat(attrib), attrib.offset);          } else { -            glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), +            glVertexAttribFormat(gl_index, attrib.ComponentCount(), +                                 MaxwellToGL::VertexFormat(attrib),                                   attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);          }          glVertexAttribBinding(gl_index, attrib.buffer); @@ -231,9 +233,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {      MICROPROFILE_SCOPE(OpenGL_VB); +    const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); +      // Upload all guest vertex arrays sequentially to our buffer      const auto& regs = gpu.regs; -    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { +    for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {          if (!flags[Dirty::VertexBuffer0 + index]) {              continue;          } @@ -246,16 +250,25 @@ void RasterizerOpenGL::SetupVertexBuffer() {          const GPUVAddr start = vertex_array.StartAddress();          const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); -          ASSERT(end >= start); + +        const GLuint gl_index = static_cast<GLuint>(index);          const u64 size = end - start;          if (size == 0) { -            glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); +            glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); +            if (use_unified_memory) { +                glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); +            }              continue;          } -        const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); -        glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, -                           vertex_array.stride); +        const auto info = buffer_cache.UploadMemory(start, size); +        if (use_unified_memory) { +            glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); +            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, +                                   info.address + info.offset, size); +        } else { +            glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); +        }      }  } @@ -268,7 +281,7 @@ void RasterizerOpenGL::SetupVertexInstances() {      flags[Dirty::VertexInstances] = false;      const auto& regs = gpu.regs; -    for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { +    for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {          if (!flags[Dirty::VertexInstance0 + index]) {              continue;          } @@ -285,9 +298,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {      MICROPROFILE_SCOPE(OpenGL_Index);      const auto& regs = system.GPU().Maxwell3D().regs;      const std::size_t size = CalculateIndexBufferSize(); -    const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); -    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); -    return offset; +    const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); +    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); +    return info.offset;  }  void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { @@ -643,9 +656,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {      if (!device.UseAssemblyShaders()) {          MaxwellUniformData ubo;          ubo.SetFromRegs(gpu); -        const auto [buffer, offset] = +        const auto info =              buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); -        glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, +        glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,                            static_cast<GLsizeiptr>(sizeof(ubo)));      } @@ -956,8 +969,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,          if (device.UseAssemblyShaders()) {              glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);          } else { -            glBindBufferRange(GL_UNIFORM_BUFFER, binding, -                              buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); +            glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));          }          return;      } @@ -970,24 +982,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,      const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();      const GPUVAddr gpu_addr = buffer.address; -    auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); +    auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);      if (device.UseAssemblyShaders()) {          UNIMPLEMENTED_IF(use_unified); -        if (offset != 0) { +        if (info.offset != 0) {              const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; -            glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); -            cbuf = staging_cbuf; -            offset = 0; +            glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); +            info.handle = staging_cbuf; +            info.offset = 0;          } -        glBindBufferRangeNV(stage, binding, cbuf, offset, size); +        glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);          return;      }      if (use_unified) { -        glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); +        glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, +                                 unified_offset, size);      } else { -        glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); +        glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);      }  } @@ -1023,9 +1036,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {  void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,                                           GPUVAddr gpu_addr, std::size_t size) {      const auto alignment{device.GetShaderStorageBufferAlignment()}; -    const auto [ssbo, buffer_offset] = -        buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); -    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, +    const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); +    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,                        static_cast<GLsizeiptr>(size));  } @@ -1712,8 +1724,9 @@ void RasterizerOpenGL::EndTransformFeedback() {          const GLuint handle = transform_feedback_buffers[index].handle;          const GPUVAddr gpu_addr = binding.Address();          const std::size_t size = binding.buffer_size; -        const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); -        glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); +        const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); +        glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, +                                 static_cast<GLsizeiptr>(size));      }  } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 46e780a06..c6a3bf3a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -460,8 +460,9 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {          const u8* host_ptr_b = memory_manager.GetPointer(address_b);          code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);      } +    const std::size_t code_size = code.size() * sizeof(u64); -    const auto unique_identifier = GetUniqueIdentifier( +    const u64 unique_identifier = GetUniqueIdentifier(          GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);      const ShaderParameters params{system,    disk_cache, device, @@ -477,7 +478,7 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {      Shader* const result = shader.get();      if (cpu_addr) { -        Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64)); +        Register(std::move(shader), *cpu_addr, code_size);      } else {          null_shader = std::move(shader);      } @@ -495,8 +496,9 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {      const auto host_ptr{memory_manager.GetPointer(code_addr)};      // No kernel found, create a new one -    auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; -    const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; +    ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; +    const std::size_t code_size{code.size() * sizeof(u64)}; +    const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};      const ShaderParameters params{system,    disk_cache, device,                                    *cpu_addr, host_ptr,   unique_identifier}; @@ -511,7 +513,7 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {      Shader* const result = kernel.get();      if (cpu_addr) { -        Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64)); +        Register(std::move(kernel), *cpu_addr, code_size);      } else {          null_kernel = std::move(kernel);      } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6848f1388..994aaeaf2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -37,7 +37,6 @@ namespace OpenGL {  class Device;  class RasterizerOpenGL; -struct UnspecializedShader;  using Maxwell = Tegra::Engines::Maxwell3D::Regs; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d6e30b321..2c49aeaac 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode;  using Tegra::Shader::IpaSampleMode;  using Tegra::Shader::PixelImap;  using Tegra::Shader::Register; +using Tegra::Shader::TextureType;  using VideoCommon::Shader::BuildTransformFeedback;  using VideoCommon::Shader::Registry; @@ -526,6 +527,9 @@ private:          if (device.HasImageLoadFormatted()) {              code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");          } +        if (device.HasTextureShadowLod()) { +            code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); +        }          if (device.HasWarpIntrinsics()) {              code.AddLine("#extension GL_NV_gpu_shader5 : require");              code.AddLine("#extension GL_NV_shader_thread_group : require"); @@ -909,13 +913,13 @@ private:                      return "samplerBuffer";                  }                  switch (sampler.type) { -                case Tegra::Shader::TextureType::Texture1D: +                case TextureType::Texture1D:                      return "sampler1D"; -                case Tegra::Shader::TextureType::Texture2D: +                case TextureType::Texture2D:                      return "sampler2D"; -                case Tegra::Shader::TextureType::Texture3D: +                case TextureType::Texture3D:                      return "sampler3D"; -                case Tegra::Shader::TextureType::TextureCube: +                case TextureType::TextureCube:                      return "samplerCube";                  default:                      UNREACHABLE(); @@ -1380,8 +1384,19 @@ private:          const std::size_t count = operation.GetOperandsCount();          const bool has_array = meta->sampler.is_array;          const bool has_shadow = meta->sampler.is_shadow; +        const bool workaround_lod_array_shadow_as_grad = +            !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && +            ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || +             meta->sampler.type == TextureType::TextureCube); + +        std::string expr = "texture"; + +        if (workaround_lod_array_shadow_as_grad) { +            expr += "Grad"; +        } else { +            expr += function_suffix; +        } -        std::string expr = "texture" + function_suffix;          if (!meta->aoffi.empty()) {              expr += "Offset";          } else if (!meta->ptp.empty()) { @@ -1415,6 +1430,16 @@ private:              expr += ')';          } +        if (workaround_lod_array_shadow_as_grad) { +            switch (meta->sampler.type) { +            case TextureType::Texture2D: +                return expr + ", vec2(0.0), vec2(0.0))"; +            case TextureType::TextureCube: +                return expr + ", vec3(0.0), vec3(0.0))"; +            } +            UNREACHABLE(); +        } +          for (const auto& variant : extras) {              if (const auto argument = std::get_if<TextureArgument>(&variant)) {                  expr += GenerateTextureArgument(*argument); @@ -2041,8 +2066,19 @@ private:          const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());          ASSERT(meta); -        std::string expr = GenerateTexture( -            operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); +        std::string expr{}; + +        if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && +            ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || +             meta->sampler.type == TextureType::TextureCube)) { +            LOG_ERROR(Render_OpenGL, +                      "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); +            expr = GenerateTexture(operation, "Lod", {}); +        } else { +            expr = GenerateTexture(operation, "Lod", +                                   {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); +        } +          if (meta->sampler.is_shadow) {              expr = "vec4(" + expr + ')';          } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 932a2f69e..3655ff629 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -2,11 +2,13 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -#include <deque> +#include <tuple>  #include <vector> +  #include "common/alignment.h"  #include "common/assert.h"  #include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_device.h"  #include "video_core/renderer_opengl/gl_stream_buffer.h"  MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",  namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, -                                 bool use_persistent) +OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)      : buffer_size(size) {      gl_buffer.Create(); @@ -29,23 +30,19 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p          allocate_size *= 2;      } -    if (use_persistent) { -        persistent = true; -        coherent = prefer_coherent; -        const GLbitfield flags = -            GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); -        glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); -        mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( -            gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); -    } else { -        glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW); +    static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; +    glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); +    mapped_ptr = static_cast<u8*>( +        glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + +    if (device.HasVertexBufferUnifiedMemory()) { +        glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); +        glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);      }  }  OGLStreamBuffer::~OGLStreamBuffer() { -    if (persistent) { -        glUnmapNamedBuffer(gl_buffer.handle); -    } +    glUnmapNamedBuffer(gl_buffer.handle);      gl_buffer.Release();  } @@ -60,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a      bool invalidate = false;      if (buffer_pos + size > buffer_size) { +        MICROPROFILE_SCOPE(OpenGL_StreamBuffer); +        glInvalidateBufferData(gl_buffer.handle); +          buffer_pos = 0;          invalidate = true; - -        if (persistent) { -            glUnmapNamedBuffer(gl_buffer.handle); -        }      } -    if (invalidate || !persistent) { -        MICROPROFILE_SCOPE(OpenGL_StreamBuffer); -        GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | -                           (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | -                           (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); -        mapped_ptr = static_cast<u8*>( -            glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); -        mapped_offset = buffer_pos; -    } - -    return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); +    return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);  }  void OGLStreamBuffer::Unmap(GLsizeiptr size) {      ASSERT(size <= mapped_size); -    if (!coherent && size > 0) { -        glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); -    } - -    if (!persistent) { -        glUnmapNamedBuffer(gl_buffer.handle); +    if (size > 0) { +        glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);      }      buffer_pos += size; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 866da3594..307a67113 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -11,10 +11,11 @@  namespace OpenGL { +class Device; +  class OGLStreamBuffer : private NonCopyable {  public: -    explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, -                             bool use_persistent = true); +    explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);      ~OGLStreamBuffer();      /* @@ -33,19 +34,20 @@ public:          return gl_buffer.handle;      } -    GLsizeiptr Size() const { +    u64 Address() const { +        return gpu_address; +    } + +    GLsizeiptr Size() const noexcept {          return buffer_size;      }  private:      OGLBuffer gl_buffer; -    bool coherent = false; -    bool persistent = false; - +    GLuint64EXT gpu_address = 0;      GLintptr buffer_pos = 0;      GLsizeiptr buffer_size = 0; -    GLintptr mapped_offset = 0;      GLsizeiptr mapped_size = 0;      u8* mapped_ptr = nullptr;  }; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 35e329240..fe9bd4b5a 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,10 +24,11 @@ namespace MaxwellToGL {  using Maxwell = Tegra::Engines::Maxwell3D::Regs; -inline GLenum VertexType(Maxwell::VertexAttribute attrib) { +inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {      switch (attrib.type) { -    case Maxwell::VertexAttribute::Type::UnsignedInt:      case Maxwell::VertexAttribute::Type::UnsignedNorm: +    case Maxwell::VertexAttribute::Type::UnsignedScaled: +    case Maxwell::VertexAttribute::Type::UnsignedInt:          switch (attrib.size) {          case Maxwell::VertexAttribute::Size::Size_8:          case Maxwell::VertexAttribute::Size::Size_8_8: @@ -48,8 +49,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {              return GL_UNSIGNED_INT_2_10_10_10_REV;          }          break; -    case Maxwell::VertexAttribute::Type::SignedInt:      case Maxwell::VertexAttribute::Type::SignedNorm: +    case Maxwell::VertexAttribute::Type::SignedScaled: +    case Maxwell::VertexAttribute::Type::SignedInt:          switch (attrib.size) {          case Maxwell::VertexAttribute::Size::Size_8:          case Maxwell::VertexAttribute::Size::Size_8_8: @@ -84,36 +86,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {              return GL_FLOAT;          }          break; -    case Maxwell::VertexAttribute::Type::UnsignedScaled: -        switch (attrib.size) { -        case Maxwell::VertexAttribute::Size::Size_8: -        case Maxwell::VertexAttribute::Size::Size_8_8: -        case Maxwell::VertexAttribute::Size::Size_8_8_8: -        case Maxwell::VertexAttribute::Size::Size_8_8_8_8: -            return GL_UNSIGNED_BYTE; -        case Maxwell::VertexAttribute::Size::Size_16: -        case Maxwell::VertexAttribute::Size::Size_16_16: -        case Maxwell::VertexAttribute::Size::Size_16_16_16: -        case Maxwell::VertexAttribute::Size::Size_16_16_16_16: -            return GL_UNSIGNED_SHORT; -        } -        break; -    case Maxwell::VertexAttribute::Type::SignedScaled: -        switch (attrib.size) { -        case Maxwell::VertexAttribute::Size::Size_8: -        case Maxwell::VertexAttribute::Size::Size_8_8: -        case Maxwell::VertexAttribute::Size::Size_8_8_8: -        case Maxwell::VertexAttribute::Size::Size_8_8_8_8: -            return GL_BYTE; -        case Maxwell::VertexAttribute::Size::Size_16: -        case Maxwell::VertexAttribute::Size::Size_16_16: -        case Maxwell::VertexAttribute::Size::Size_16_16_16: -        case Maxwell::VertexAttribute::Size::Size_16_16_16_16: -            return GL_SHORT; -        } -        break;      } -    UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), +    UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(),                        attrib.SizeString());      return {};  } @@ -217,6 +191,12 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {          } else {              return GL_MIRROR_CLAMP_TO_EDGE;          } +    case Tegra::Texture::WrapMode::MirrorOnceClampOGL: +        if (GL_EXT_texture_mirror_clamp) { +            return GL_MIRROR_CLAMP_EXT; +        } else { +            return GL_MIRROR_CLAMP_TO_EDGE; +        }      }      UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));      return GL_REPEAT; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6214fcbc3..c40adb6e7 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() {      // Clear screen to black      LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + +    // Enable unified vertex attributes and query vertex buffer address when the driver supports it +    if (device.HasVertexBufferUnifiedMemory()) { +        glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + +        glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); +        glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, +                                         &vertex_buffer_address); +    }  }  void RendererOpenGL::AddTelemetryFields() { @@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {                           offsetof(ScreenRectVertex, tex_coord));      glVertexAttribBinding(PositionLocation, 0);      glVertexAttribBinding(TexCoordLocation, 0); -    glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); +    if (device.HasVertexBufferUnifiedMemory()) { +        glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); +        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, +                               sizeof(vertices)); +    } else { +        glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); +    }      glBindTextureUnit(0, screen_info.display_texture);      glBindSampler(0, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 61bf507f4..8b18d32e6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -107,6 +107,9 @@ private:      OGLPipeline pipeline;      OGLFramebuffer screenshot_framebuffer; +    // GPU address of the vertex buffer +    GLuint64EXT vertex_buffer_address = 0; +      /// Display information for Switch screen      ScreenInfo screen_info; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 1f2b6734b..d7f1ae89f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -294,6 +294,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,  VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {      switch (type) { +    case Maxwell::VertexAttribute::Type::UnsignedNorm: +        switch (size) { +        case Maxwell::VertexAttribute::Size::Size_8: +            return VK_FORMAT_R8_UNORM; +        case Maxwell::VertexAttribute::Size::Size_8_8: +            return VK_FORMAT_R8G8_UNORM; +        case Maxwell::VertexAttribute::Size::Size_8_8_8: +            return VK_FORMAT_R8G8B8_UNORM; +        case Maxwell::VertexAttribute::Size::Size_8_8_8_8: +            return VK_FORMAT_R8G8B8A8_UNORM; +        case Maxwell::VertexAttribute::Size::Size_16: +            return VK_FORMAT_R16_UNORM; +        case Maxwell::VertexAttribute::Size::Size_16_16: +            return VK_FORMAT_R16G16_UNORM; +        case Maxwell::VertexAttribute::Size::Size_16_16_16: +            return VK_FORMAT_R16G16B16_UNORM; +        case Maxwell::VertexAttribute::Size::Size_16_16_16_16: +            return VK_FORMAT_R16G16B16A16_UNORM; +        case Maxwell::VertexAttribute::Size::Size_10_10_10_2: +            return VK_FORMAT_A2B10G10R10_UNORM_PACK32; +        } +        break;      case Maxwell::VertexAttribute::Type::SignedNorm:          switch (size) {          case Maxwell::VertexAttribute::Size::Size_8: @@ -314,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib              return VK_FORMAT_R16G16B16A16_SNORM;          case Maxwell::VertexAttribute::Size::Size_10_10_10_2:              return VK_FORMAT_A2B10G10R10_SNORM_PACK32; -        default: -            break;          }          break; -    case Maxwell::VertexAttribute::Type::UnsignedNorm: +    case Maxwell::VertexAttribute::Type::UnsignedScaled:          switch (size) {          case Maxwell::VertexAttribute::Size::Size_8: -            return VK_FORMAT_R8_UNORM; +            return VK_FORMAT_R8_USCALED;          case Maxwell::VertexAttribute::Size::Size_8_8: -            return VK_FORMAT_R8G8_UNORM; +            return VK_FORMAT_R8G8_USCALED;          case Maxwell::VertexAttribute::Size::Size_8_8_8: -            return VK_FORMAT_R8G8B8_UNORM; +            return VK_FORMAT_R8G8B8_USCALED;          case Maxwell::VertexAttribute::Size::Size_8_8_8_8: -            return VK_FORMAT_R8G8B8A8_UNORM; +            return VK_FORMAT_R8G8B8A8_USCALED;          case Maxwell::VertexAttribute::Size::Size_16: -            return VK_FORMAT_R16_UNORM; +            return VK_FORMAT_R16_USCALED;          case Maxwell::VertexAttribute::Size::Size_16_16: -            return VK_FORMAT_R16G16_UNORM; +            return VK_FORMAT_R16G16_USCALED;          case Maxwell::VertexAttribute::Size::Size_16_16_16: -            return VK_FORMAT_R16G16B16_UNORM; +            return VK_FORMAT_R16G16B16_USCALED;          case Maxwell::VertexAttribute::Size::Size_16_16_16_16: -            return VK_FORMAT_R16G16B16A16_UNORM; +            return VK_FORMAT_R16G16B16A16_USCALED;          case Maxwell::VertexAttribute::Size::Size_10_10_10_2: -            return VK_FORMAT_A2B10G10R10_UNORM_PACK32; -        default: -            break; +            return VK_FORMAT_A2B10G10R10_USCALED_PACK32;          }          break; -    case Maxwell::VertexAttribute::Type::SignedInt: +    case Maxwell::VertexAttribute::Type::SignedScaled:          switch (size) {          case Maxwell::VertexAttribute::Size::Size_8: -            return VK_FORMAT_R8_SINT; +            return VK_FORMAT_R8_SSCALED;          case Maxwell::VertexAttribute::Size::Size_8_8: -            return VK_FORMAT_R8G8_SINT; +            return VK_FORMAT_R8G8_SSCALED;          case Maxwell::VertexAttribute::Size::Size_8_8_8: -            return VK_FORMAT_R8G8B8_SINT; +            return VK_FORMAT_R8G8B8_SSCALED;          case Maxwell::VertexAttribute::Size::Size_8_8_8_8: -            return VK_FORMAT_R8G8B8A8_SINT; +            return VK_FORMAT_R8G8B8A8_SSCALED;          case Maxwell::VertexAttribute::Size::Size_16: -            return VK_FORMAT_R16_SINT; +            return VK_FORMAT_R16_SSCALED;          case Maxwell::VertexAttribute::Size::Size_16_16: -            return VK_FORMAT_R16G16_SINT; +            return VK_FORMAT_R16G16_SSCALED;          case Maxwell::VertexAttribute::Size::Size_16_16_16: -            return VK_FORMAT_R16G16B16_SINT; +            return VK_FORMAT_R16G16B16_SSCALED;          case Maxwell::VertexAttribute::Size::Size_16_16_16_16: -            return VK_FORMAT_R16G16B16A16_SINT; -        case Maxwell::VertexAttribute::Size::Size_32: -            return VK_FORMAT_R32_SINT; -        case Maxwell::VertexAttribute::Size::Size_32_32: -            return VK_FORMAT_R32G32_SINT; -        case Maxwell::VertexAttribute::Size::Size_32_32_32: -            return VK_FORMAT_R32G32B32_SINT; -        case Maxwell::VertexAttribute::Size::Size_32_32_32_32: -            return VK_FORMAT_R32G32B32A32_SINT; -        default: -            break; +            return VK_FORMAT_R16G16B16A16_SSCALED; +        case Maxwell::VertexAttribute::Size::Size_10_10_10_2: +            return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;          }          break;      case Maxwell::VertexAttribute::Type::UnsignedInt: @@ -398,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib              return VK_FORMAT_R32G32B32_UINT;          case Maxwell::VertexAttribute::Size::Size_32_32_32_32:              return VK_FORMAT_R32G32B32A32_UINT; -        default: -            break; +        case Maxwell::VertexAttribute::Size::Size_10_10_10_2: +            return VK_FORMAT_A2B10G10R10_UINT_PACK32;          }          break; -    case Maxwell::VertexAttribute::Type::UnsignedScaled: +    case Maxwell::VertexAttribute::Type::SignedInt:          switch (size) {          case Maxwell::VertexAttribute::Size::Size_8: -            return VK_FORMAT_R8_USCALED; +            return VK_FORMAT_R8_SINT;          case Maxwell::VertexAttribute::Size::Size_8_8: -            return VK_FORMAT_R8G8_USCALED; +            return VK_FORMAT_R8G8_SINT;          case Maxwell::VertexAttribute::Size::Size_8_8_8: -            return VK_FORMAT_R8G8B8_USCALED; +            return VK_FORMAT_R8G8B8_SINT;          case Maxwell::VertexAttribute::Size::Size_8_8_8_8: -            return VK_FORMAT_R8G8B8A8_USCALED; +            return VK_FORMAT_R8G8B8A8_SINT;          case Maxwell::VertexAttribute::Size::Size_16: -            return VK_FORMAT_R16_USCALED; +            return VK_FORMAT_R16_SINT;          case Maxwell::VertexAttribute::Size::Size_16_16: -            return VK_FORMAT_R16G16_USCALED; +            return VK_FORMAT_R16G16_SINT;          case Maxwell::VertexAttribute::Size::Size_16_16_16: -            return VK_FORMAT_R16G16B16_USCALED; +            return VK_FORMAT_R16G16B16_SINT;          case Maxwell::VertexAttribute::Size::Size_16_16_16_16: -            return VK_FORMAT_R16G16B16A16_USCALED; -        default: -            break; +            return VK_FORMAT_R16G16B16A16_SINT; +        case Maxwell::VertexAttribute::Size::Size_32: +            return VK_FORMAT_R32_SINT; +        case Maxwell::VertexAttribute::Size::Size_32_32: +            return VK_FORMAT_R32G32_SINT; +        case Maxwell::VertexAttribute::Size::Size_32_32_32: +            return VK_FORMAT_R32G32B32_SINT; +        case Maxwell::VertexAttribute::Size::Size_32_32_32_32: +            return VK_FORMAT_R32G32B32A32_SINT; +        case Maxwell::VertexAttribute::Size::Size_10_10_10_2: +            return VK_FORMAT_A2B10G10R10_SINT_PACK32;          }          break; -    case Maxwell::VertexAttribute::Type::SignedScaled: +    case Maxwell::VertexAttribute::Type::Float:          switch (size) { -        case Maxwell::VertexAttribute::Size::Size_8: -            return VK_FORMAT_R8_SSCALED; -        case Maxwell::VertexAttribute::Size::Size_8_8: -            return VK_FORMAT_R8G8_SSCALED; -        case Maxwell::VertexAttribute::Size::Size_8_8_8: -            return VK_FORMAT_R8G8B8_SSCALED; -        case Maxwell::VertexAttribute::Size::Size_8_8_8_8: -            return VK_FORMAT_R8G8B8A8_SSCALED;          case Maxwell::VertexAttribute::Size::Size_16: -            return VK_FORMAT_R16_SSCALED; +            return VK_FORMAT_R16_SFLOAT;          case Maxwell::VertexAttribute::Size::Size_16_16: -            return VK_FORMAT_R16G16_SSCALED; +            return VK_FORMAT_R16G16_SFLOAT;          case Maxwell::VertexAttribute::Size::Size_16_16_16: -            return VK_FORMAT_R16G16B16_SSCALED; +            return VK_FORMAT_R16G16B16_SFLOAT;          case Maxwell::VertexAttribute::Size::Size_16_16_16_16: -            return VK_FORMAT_R16G16B16A16_SSCALED; -        default: -            break; -        } -        break; -    case Maxwell::VertexAttribute::Type::Float: -        switch (size) { +            return VK_FORMAT_R16G16B16A16_SFLOAT;          case Maxwell::VertexAttribute::Size::Size_32:              return VK_FORMAT_R32_SFLOAT;          case Maxwell::VertexAttribute::Size::Size_32_32: @@ -456,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib              return VK_FORMAT_R32G32B32_SFLOAT;          case Maxwell::VertexAttribute::Size::Size_32_32_32_32:              return VK_FORMAT_R32G32B32A32_SFLOAT; -        case Maxwell::VertexAttribute::Size::Size_16: -            return VK_FORMAT_R16_SFLOAT; -        case Maxwell::VertexAttribute::Size::Size_16_16: -            return VK_FORMAT_R16G16_SFLOAT; -        case Maxwell::VertexAttribute::Size::Size_16_16_16: -            return VK_FORMAT_R16G16B16_SFLOAT; -        case Maxwell::VertexAttribute::Size::Size_16_16_16_16: -            return VK_FORMAT_R16G16B16A16_SFLOAT; -        default: -            break;          }          break;      } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index cd9673d1f..2d9b18ed9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -155,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc          }      } -    static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; -    vk::Span<const char*> layers = layers_data; -    if (!enable_layers) { -        layers = {}; +    std::vector<const char*> layers; +    layers.reserve(1); +    if (enable_layers) { +        layers.push_back("VK_LAYER_KHRONOS_validation"); +    } + +    const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); +    if (!layer_properties) { +        LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); +        layers.clear(); +    } + +    for (auto layer_it = layers.begin(); layer_it != layers.end();) { +        const char* const layer = *layer_it; +        const auto it = std::find_if( +            layer_properties->begin(), layer_properties->end(), +            [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); +        if (it == layer_properties->end()) { +            LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); +            layer_it = layers.erase(layer_it); +        } else { +            ++layer_it; +        }      } +      vk::Instance instance = vk::Instance::Create(layers, extensions, dld);      if (!instance) {          LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1fde38328..2be38d419 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch  } // Anonymous namespace -Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, -               std::size_t size) -    : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, +               VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) +    : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {      VkBufferCreateInfo ci;      ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;      ci.pNext = nullptr; @@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp  Buffer::~Buffer() = default; -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, -                             const VKDevice& device, VKMemoryManager& memory_manager, -                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool) -    : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, -                                                                 CreateStreamBuffer(device, -                                                                                    scheduler)}, -      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ -                                                                                staging_pool} {} - -VKBufferCache::~VKBufferCache() = default; - -std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { -    return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size); -} - -VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { -    size = std::max(size, std::size_t(4)); -    const auto& empty = staging_pool.GetUnusedBuffer(size, false); -    scheduler.RequestOutsideRenderPassOperationContext(); -    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { -        cmdbuf.FillBuffer(buffer, 0, size, 0); -    }); -    return *empty.handle; -} - -void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                                    const u8* data) { +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {      const auto& staging = staging_pool.GetUnusedBuffer(size, true);      std::memcpy(staging.commit->Map(size), data, size);      scheduler.RequestOutsideRenderPassOperationContext(); -    scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, -                      size](vk::CommandBuffer cmdbuf) { -        cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); + +    const VkBuffer handle = Handle(); +    scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { +        cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});          VkBufferMemoryBarrier barrier;          barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st          barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;          barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;          barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; -        barrier.buffer = buffer; +        barrier.buffer = handle;          barrier.offset = offset;          barrier.size = size;          cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, @@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st      });  } -void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                                      u8* data) { +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {      const auto& staging = staging_pool.GetUnusedBuffer(size, true);      scheduler.RequestOutsideRenderPassOperationContext(); -    scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, -                      size](vk::CommandBuffer cmdbuf) { + +    const VkBuffer handle = Handle(); +    scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {          VkBufferMemoryBarrier barrier;          barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;          barrier.pNext = nullptr; @@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,          barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;          barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;          barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; -        barrier.buffer = buffer; +        barrier.buffer = handle;          barrier.offset = offset;          barrier.size = size; @@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,                                     VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |                                     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,                                 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); -        cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); +        cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});      });      scheduler.Finish();      std::memcpy(data, staging.commit->Map(size), size);  } -void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, -                              std::size_t dst_offset, std::size_t size) { +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, +                      std::size_t size) {      scheduler.RequestOutsideRenderPassOperationContext(); -    scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, + +    const VkBuffer dst_buffer = Handle(); +    scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,                        size](vk::CommandBuffer cmdbuf) {          cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); @@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t      });  } +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, +                             const VKDevice& device, VKMemoryManager& memory_manager, +                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool) +    : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, +                                                                 CreateStreamBuffer(device, +                                                                                    scheduler)}, +      device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ +                                                                                staging_pool} {} + +VKBufferCache::~VKBufferCache() = default; + +std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { +    return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr, +                                    size); +} + +VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { +    size = std::max(size, std::size_t(4)); +    const auto& empty = staging_pool.GetUnusedBuffer(size, false); +    scheduler.RequestOutsideRenderPassOperationContext(); +    scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { +        cmdbuf.FillBuffer(buffer, 0, size, 0); +    }); +    return {*empty.handle, 0, 0}; +} +  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 9ebbef835..991ee451c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -25,15 +25,29 @@ class VKScheduler;  class Buffer final : public VideoCommon::BufferBlock {  public: -    explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, -                    std::size_t size); +    explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, +                    VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);      ~Buffer(); +    void Upload(std::size_t offset, std::size_t size, const u8* data); + +    void Download(std::size_t offset, std::size_t size, u8* data); + +    void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, +                  std::size_t size); +      VkBuffer Handle() const {          return *buffer.handle;      } +    u64 Address() const { +        return 0; +    } +  private: +    VKScheduler& scheduler; +    VKStagingBufferPool& staging_pool; +      VKBuffer buffer;  }; @@ -44,20 +58,11 @@ public:                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool);      ~VKBufferCache(); -    VkBuffer GetEmptyBuffer(std::size_t size) override; +    BufferInfo GetEmptyBuffer(std::size_t size) override;  protected:      std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; -    void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                         const u8* data) override; - -    void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, -                           u8* data) override; - -    void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, -                   std::size_t dst_offset, std::size_t size) override; -  private:      const VKDevice& device;      VKMemoryManager& memory_manager; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a77fa35c3..a8d94eac3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -143,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry      }  } +/// @brief Determine if an attachment to be updated has to preserve contents +/// @param is_clear True when a clear is being executed +/// @param regs 3D registers +/// @return True when the contents have to be preserved +bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { +    if (!is_clear) { +        return true; +    } +    // First we have to make sure all clear masks are enabled. +    if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || +        !regs.clear_buffers.A) { +        return true; +    } +    // If scissors are disabled, the whole screen is cleared +    if (!regs.clear_flags.scissor) { +        return false; +    } +    // Then we have to confirm scissor testing clears the whole image +    const std::size_t index = regs.clear_buffers.RT; +    const auto& scissor = regs.scissor_test[0]; +    return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || +           scissor.max_y < regs.rt[index].height; +} + +/// @brief Determine if an attachment to be updated has to preserve contents +/// @param is_clear True when a clear is being executed +/// @param regs 3D registers +/// @return True when the contents have to be preserved +bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { +    // If we are not clearing, the contents have to be preserved +    if (!is_clear) { +        return true; +    } +    // For depth stencil clears we only have to confirm scissor test covers the whole image +    if (!regs.clear_flags.scissor) { +        return false; +    } +    // Make sure the clear cover the whole image +    const auto& scissor = regs.scissor_test[0]; +    return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || +           scissor.max_y < regs.zeta_height; +} +  } // Anonymous namespace  class BufferBindings final { @@ -344,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {      buffer_cache.Unmap(); -    const Texceptions texceptions = UpdateAttachments(); +    const Texceptions texceptions = UpdateAttachments(false);      SetupImageTransitions(texceptions, color_attachments, zeta_attachment);      key.renderpass_params = GetRenderPassParams(texceptions); @@ -400,7 +443,7 @@ void RasterizerVulkan::Clear() {          return;      } -    [[maybe_unused]] const auto texceptions = UpdateAttachments(); +    [[maybe_unused]] const auto texceptions = UpdateAttachments(true);      DEBUG_ASSERT(texceptions.none());      SetupImageTransitions(0, color_attachments, zeta_attachment); @@ -677,9 +720,12 @@ void RasterizerVulkan::FlushWork() {      draw_counter = 0;  } -RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { +RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {      MICROPROFILE_SCOPE(Vulkan_RenderTargets); -    auto& dirty = system.GPU().Maxwell3D().dirty.flags; +    auto& maxwell3d = system.GPU().Maxwell3D(); +    auto& dirty = maxwell3d.dirty.flags; +    auto& regs = maxwell3d.regs; +      const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];      dirty[VideoCommon::Dirty::RenderTargets] = false; @@ -688,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {      Texceptions texceptions;      for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {          if (update_rendertargets) { -            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); +            const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); +            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);          }          if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {              texceptions[rt] = true; @@ -696,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {      }      if (update_rendertargets) { -        zeta_attachment = texture_cache.GetDepthBufferSurface(true); +        const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); +        zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);      }      if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {          texceptions[ZETA_TEXCEPTION_INDEX] = true; @@ -870,10 +918,10 @@ void RasterizerVulkan::BeginTransformFeedback() {      UNIMPLEMENTED_IF(binding.buffer_offset != 0);      const GPUVAddr gpu_addr = binding.Address(); -    const auto size = static_cast<VkDeviceSize>(binding.buffer_size); -    const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); +    const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); +    const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); -    scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { +    scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {          cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);          cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);      }); @@ -925,8 +973,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex              buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);              continue;          } -        const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); -        buffer_bindings.AddVertexBinding(buffer, offset); +        const auto info = buffer_cache.UploadMemory(start, size); +        buffer_bindings.AddVertexBinding(info.handle, info.offset);      }  } @@ -948,7 +996,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar              break;          }          const GPUVAddr gpu_addr = regs.index_array.IndexStart(); -        auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); +        const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); +        VkBuffer buffer = info.handle; +        u64 offset = info.offset;          std::tie(buffer, offset) = quad_indexed_pass.Assemble(              regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); @@ -962,7 +1012,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar              break;          }          const GPUVAddr gpu_addr = regs.index_array.IndexStart(); -        auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); +        const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); +        VkBuffer buffer = info.handle; +        u64 offset = info.offset;          auto format = regs.index_array.format;          const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; @@ -1109,10 +1161,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,          Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));      ASSERT(size <= MaxConstbufferSize); -    const auto [buffer_handle, offset] = +    const auto info =          buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); - -    update_descriptor_queue.AddBuffer(buffer_handle, offset, size); +    update_descriptor_queue.AddBuffer(info.handle, info.offset, size);  }  void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { @@ -1126,14 +1177,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd          // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the          // default buffer.          static constexpr std::size_t dummy_size = 4; -        const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); -        update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); +        const auto info = buffer_cache.GetEmptyBuffer(dummy_size); +        update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);          return;      } -    const auto [buffer, offset] = buffer_cache.UploadMemory( +    const auto info = buffer_cache.UploadMemory(          actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); -    update_descriptor_queue.AddBuffer(buffer, offset, size); +    update_descriptor_queue.AddBuffer(info.handle, info.offset, size);  }  void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, @@ -1154,7 +1205,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu      const auto sampler = sampler_cache.GetSampler(texture.tsc);      update_descriptor_queue.AddSampledImage(sampler, image_view); -    const auto image_layout = update_descriptor_queue.GetLastImageLayout(); +    VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();      *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;      sampled_views.push_back(ImageView{std::move(view), image_layout});  } @@ -1180,7 +1231,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima          view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);      update_descriptor_queue.AddImage(image_view); -    const auto image_layout = update_descriptor_queue.GetLastImageLayout(); +    VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();      *image_layout = VK_IMAGE_LAYOUT_GENERAL;      image_views.push_back(ImageView{std::move(view), image_layout});  } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c8c187606..83e00e7e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -159,7 +159,10 @@ private:      void FlushWork(); -    Texceptions UpdateAttachments(); +    /// @brief Updates the currently bound attachments +    /// @param is_clear True when the framebuffer is updated as a clear +    /// @return Bitfield of attachments being used as sampled textures +    Texceptions UpdateAttachments(bool is_clear);      std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 82ec9180e..56524e6f3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -9,6 +9,7 @@  #include <utility>  #include "common/microprofile.h" +#include "common/thread.h"  #include "video_core/renderer_vulkan/vk_device.h"  #include "video_core/renderer_vulkan/vk_query_cache.h"  #include "video_core/renderer_vulkan/vk_resource_manager.h" @@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {  }  void VKScheduler::WorkerThread() { +    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);      std::unique_lock lock{mutex};      do {          cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index c765c60a0..689f0d276 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,10 +35,14 @@ public:      /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.      void Unmap(u64 size); -    VkBuffer Handle() const { +    VkBuffer Handle() const noexcept {          return *buffer;      } +    u64 Address() const noexcept { +        return 0; +    } +  private:      struct Watch final {          VKFenceWatch fence; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 681ecde98..351c048d2 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() {  }  void VKUpdateDescriptorQueue::Acquire() { -    entries.clear(); -} +    // Minimum number of entries required. +    // This is the maximum number of entries a single draw call migth use. +    static constexpr std::size_t MIN_ENTRIES = 0x400; -void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, -                                   VkDescriptorSet set) { -    if (payload.size() + entries.size() >= payload.max_size()) { +    if (payload.size() + MIN_ENTRIES >= payload.max_size()) {          LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");          scheduler.WaitWorker();          payload.clear();      } +    upload_start = &*payload.end(); +} -    // TODO(Rodrigo): Rework to write the payload directly -    const auto payload_start = payload.data() + payload.size(); -    for (const auto& entry : entries) { -        if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { -            payload.push_back(*image); -        } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { -            payload.push_back(*buffer); -        } else if (const auto texel = std::get_if<VkBufferView>(&entry)) { -            payload.push_back(*texel); -        } else { -            UNREACHABLE(); -        } -    } - -    scheduler.Record( -        [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) { -            logical->UpdateDescriptorSet(set, update_template, payload_start); -        }); +void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, +                                   VkDescriptorSet set) { +    const void* const data = upload_start; +    const vk::Device* const logical = &device.GetLogical(); +    scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { +        logical->UpdateDescriptorSet(set, update_template, data); +    });  }  } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index cc7e3dff4..945320c72 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -15,17 +15,13 @@ namespace Vulkan {  class VKDevice;  class VKScheduler; -class DescriptorUpdateEntry { -public: -    explicit DescriptorUpdateEntry() {} - -    DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} +struct DescriptorUpdateEntry { +    DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} -    DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} +    DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {} -    DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} +    DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {} -private:      union {          VkDescriptorImageInfo image;          VkDescriptorBufferInfo buffer; @@ -45,32 +41,34 @@ public:      void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);      void AddSampledImage(VkSampler sampler, VkImageView image_view) { -        entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); +        payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});      }      void AddImage(VkImageView image_view) { -        entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); +        payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});      }      void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { -        entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); +        payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});      }      void AddTexelBuffer(VkBufferView texel_buffer) { -        entries.emplace_back(texel_buffer); +        payload.emplace_back(texel_buffer);      } -    VkImageLayout* GetLastImageLayout() { -        return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; +    VkImageLayout* LastImageLayout() { +        return &payload.back().image.imageLayout;      } -private: -    using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; +    const VkImageLayout* LastImageLayout() const { +        return &payload.back().image.imageLayout; +    } +private:      const VKDevice& device;      VKScheduler& scheduler; -    boost::container::static_vector<Variant, 0x400> entries; +    const DescriptorUpdateEntry* upload_start = nullptr;      boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;  }; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 2ce9b0626..0d485a662 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {  bool Load(InstanceDispatch& dld) noexcept {  #define X(name) Proc(dld.name, dld, #name) -    return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); +    return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) && +           X(vkEnumerateInstanceLayerProperties);  #undef X  } @@ -725,8 +726,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s      return supported == VK_TRUE;  } -VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const -    noexcept { +VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {      VkSurfaceCapabilitiesKHR capabilities;      Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));      return capabilities; @@ -771,4 +771,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp      return properties;  } +std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( +    const InstanceDispatch& dld) { +    u32 num; +    if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) { +        return std::nullopt; +    } +    std::vector<VkLayerProperties> properties(num); +    if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) { +        return std::nullopt; +    } +    return properties; +} +  } // namespace Vulkan::vk diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 98937a77a..d56fdb3f9 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -141,6 +141,7 @@ struct InstanceDispatch {      PFN_vkCreateInstance vkCreateInstance;      PFN_vkDestroyInstance vkDestroyInstance;      PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; +    PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;      PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;      PFN_vkCreateDevice vkCreateDevice; @@ -779,7 +780,7 @@ public:      bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; -    VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; +    VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;      std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; @@ -996,4 +997,7 @@ private:  std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(      const InstanceDispatch& dld); +std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( +    const InstanceDispatch& dld); +  } // namespace Vulkan::vk diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 848e46874..b2e88fa20 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -13,55 +13,101 @@  namespace VideoCommon::Shader { +using std::move;  using Tegra::Shader::Instruction;  using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition;  u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {      const Instruction instr = {program_code[pc]};      const auto opcode = OpCode::Decode(instr); -    if (instr.hset2.ftz == 0) { -        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); +    PredCondition cond; +    bool bf; +    bool ftz; +    bool neg_a; +    bool abs_a; +    bool neg_b; +    bool abs_b; +    switch (opcode->get().GetId()) { +    case OpCode::Id::HSET2_C: +    case OpCode::Id::HSET2_IMM: +        cond = instr.hsetp2.cbuf_and_imm.cond; +        bf = instr.Bit(53); +        ftz = instr.Bit(54); +        neg_a = instr.Bit(43); +        abs_a = instr.Bit(44); +        neg_b = instr.Bit(56); +        abs_b = instr.Bit(54); +        break; +    case OpCode::Id::HSET2_R: +        cond = instr.hsetp2.reg.cond; +        bf = instr.Bit(49); +        ftz = instr.Bit(50); +        neg_a = instr.Bit(43); +        abs_a = instr.Bit(44); +        neg_b = instr.Bit(31); +        abs_b = instr.Bit(30); +        break; +    default: +        UNREACHABLE();      } -    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); -    op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); - -    Node op_b = [&]() { +    Node op_b = [this, instr, opcode] {          switch (opcode->get().GetId()) { +        case OpCode::Id::HSET2_C: +            // Inform as unimplemented as this is not tested. +            UNIMPLEMENTED_MSG("HSET2_C is not implemented"); +            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());          case OpCode::Id::HSET2_R:              return GetRegister(instr.gpr20); +        case OpCode::Id::HSET2_IMM: +            return UnpackHalfImmediate(instr, true);          default:              UNREACHABLE(); -            return Immediate(0); +            return Node{};          }      }(); -    op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); -    op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); -    const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); +    if (!ftz) { +        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); +    } + +    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); +    op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); + +    switch (opcode->get().GetId()) { +    case OpCode::Id::HSET2_R: +        op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); +        [[fallthrough]]; +    case OpCode::Id::HSET2_C: +        op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); +        break; +    default: +        break; +    } -    const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); +    Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); + +    Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);      const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);      // HSET2 operates on each half float in the pack.      std::array<Node, 2> values;      for (u32 i = 0; i < 2; ++i) { -        const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; -        const Node true_value = Immediate(raw_value << (i * 16)); -        const Node false_value = Immediate(0); - -        const Node comparison = -            Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); -        const Node predicate = Operation(combiner, comparison, second_pred); +        const u32 raw_value = bf ? 0x3c00 : 0xffff; +        Node true_value = Immediate(raw_value << (i * 16)); +        Node false_value = Immediate(0); +        Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); +        Node predicate = Operation(combiner, comparison, second_pred);          values[i] = -            Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); +            Operation(OperationCode::Select, predicate, move(true_value), move(false_value));      } -    const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); -    SetRegister(bb, instr.gpr0, value); +    Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); +    SetRegister(bb, instr.gpr0, move(value));      return pc;  } diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 60b6ad72a..07778dc3e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,          break;      case TextureFormat::B5G6R5:      case TextureFormat::B6G5R5: +    case TextureFormat::BF10GF11RF11:          if (component == 0) {              return descriptor.b_type;          } @@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,          }          break;      } -    UNIMPLEMENTED_MSG("texture format not implement={}", format); +    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);      return ComponentType::FLOAT;  } @@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {              return 6;          }          return 0; +    case TextureFormat::BF10GF11RF11: +        if (component == 1 || component == 2) { +            return 11; +        } +        if (component == 0) { +            return 10; +        } +        return 0;      case TextureFormat::G8R24:          if (component == 0) {              return 8; @@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {          return (component == 0 || component == 1) ? 8 : 0;      case TextureFormat::G4R4:          return (component == 0 || component == 1) ? 4 : 0; -    default: -        UNIMPLEMENTED_MSG("texture format not implement={}", format); -        return 0;      } +    UNIMPLEMENTED_MSG("Texture format not implemented={}", format); +    return 0;  }  std::size_t GetImageComponentMask(TextureFormat format) { @@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) {      case TextureFormat::R32_B24G8:      case TextureFormat::B5G6R5:      case TextureFormat::B6G5R5: +    case TextureFormat::BF10GF11RF11:          return std::size_t{R | G | B};      case TextureFormat::R32_G32:      case TextureFormat::R16_G16: @@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) {      case TextureFormat::R8:      case TextureFormat::R1:          return std::size_t{R}; -    default: -        UNIMPLEMENTED_MSG("texture format not implement={}", format); -        return std::size_t{R | G | B | A};      } +    UNIMPLEMENTED_MSG("Texture format not implemented={}", format); +    return std::size_t{R | G | B | A};  }  std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { @@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type,              return {std::move(original_value), true};          }      default: -        UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); +        UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);          return {std::move(original_value), true};      }  } @@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {              default:                  break;              } -            UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", +            UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",                                static_cast<u64>(instr.suatom_d.operation.Value()),                                static_cast<u64>(instr.suatom_d.operation_type.Value()));              return OperationCode::AtomicImageAdd; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 94d3a6ae5..0caf3b4f0 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(      }      const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};      const auto layer{static_cast<u32>(relative_address / layer_size)}; +    if (layer >= params.depth) { +        return {}; +    }      const GPUVAddr mipmap_address = relative_address - layer_size * layer;      const auto mipmap_it =          Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b543fc8c0..6207d8dfe 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -24,6 +24,7 @@  #include "core/core.h"  #include "core/memory.h"  #include "core/settings.h" +#include "video_core/compatible_formats.h"  #include "video_core/dirty_flags.h"  #include "video_core/engines/fermi_2d.h"  #include "video_core/engines/maxwell_3d.h" @@ -47,8 +48,8 @@ class RasterizerInterface;  namespace VideoCommon { +using VideoCore::Surface::FormatCompatibility;  using VideoCore::Surface::PixelFormat; -  using VideoCore::Surface::SurfaceTarget;  using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; @@ -595,7 +596,7 @@ private:          } else {              new_surface = GetUncachedSurface(gpu_addr, params);          } -        const auto& final_params = new_surface->GetSurfaceParams(); +        const SurfaceParams& final_params = new_surface->GetSurfaceParams();          if (cr_params.type != final_params.type) {              if (Settings::IsGPULevelExtreme()) {                  BufferCopy(current_surface, new_surface); @@ -603,7 +604,7 @@ private:          } else {              std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);              for (auto& brick : bricks) { -                ImageCopy(current_surface, new_surface, brick); +                TryCopyImage(current_surface, new_surface, brick);              }          }          Unregister(current_surface); @@ -694,7 +695,7 @@ private:                  }                  const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,                                               src_params.depth); -                ImageCopy(surface, new_surface, copy_params); +                TryCopyImage(surface, new_surface, copy_params);              }          }          if (passed_tests == 0) { @@ -791,7 +792,7 @@ private:              const u32 width = params.width;              const u32 height = params.height;              const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); -            ImageCopy(surface, new_surface, copy_params); +            TryCopyImage(surface, new_surface, copy_params);          }          for (const auto& surface : overlaps) {              Unregister(surface); @@ -1053,7 +1054,7 @@ private:      void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,                          const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {          auto deduced_src = DeduceSurface(src_gpu_addr, src_params); -        auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); +        auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);          if (deduced_src.Failed() || deduced_dst.Failed()) {              return;          } @@ -1192,6 +1193,19 @@ private:          return {};      } +    /// Try to do an image copy logging when formats are incompatible. +    void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { +        const SurfaceParams& src_params = src->GetSurfaceParams(); +        const SurfaceParams& dst_params = dst->GetSurfaceParams(); +        if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { +            LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", +                      static_cast<int>(dst_params.pixel_format), +                      static_cast<int>(src_params.pixel_format)); +            return; +        } +        ImageCopy(src, dst, copy); +    } +      constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {          return siblings_table[static_cast<std::size_t>(format)];      } @@ -1241,6 +1255,7 @@ private:      VideoCore::RasterizerInterface& rasterizer;      FormatLookupTable format_lookup_table; +    FormatCompatibility format_compatibility;      u64 ticks{}; diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 696da2137..4bfce48a4 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -44,49 +44,65 @@ EmuThread::EmuThread() = default;  EmuThread::~EmuThread() = default;  void EmuThread::run() { -    MicroProfileOnThreadCreate("EmuThread"); +    std::string name = "yuzu:EmuControlThread"; +    MicroProfileOnThreadCreate(name.c_str()); +    Common::SetCurrentThreadName(name.c_str()); + +    auto& system = Core::System::GetInstance(); + +    system.RegisterHostThread(); + +    auto& gpu = system.GPU();      // Main process has been loaded. Make the context current to this thread and begin GPU and CPU      // execution. -    Core::System::GetInstance().GPU().Start(); +    gpu.Start(); + +    gpu.ObtainContext();      emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); -    Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( +    system.Renderer().Rasterizer().LoadDiskResources(          stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {              emit LoadProgress(stage, value, total);          });      emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); +    gpu.ReleaseContext(); +      // Holds whether the cpu was running during the last iteration,      // so that the DebugModeLeft signal can be emitted before the      // next execution step      bool was_active = false;      while (!stop_run) {          if (running) { -            if (!was_active) +            if (was_active) {                  emit DebugModeLeft(); +            } -            Core::System::ResultStatus result = Core::System::GetInstance().RunLoop(); +            running_guard = true; +            Core::System::ResultStatus result = system.Run();              if (result != Core::System::ResultStatus::Success) { +                running_guard = false;                  this->SetRunning(false); -                emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails()); +                emit ErrorThrown(result, system.GetStatusDetails());              } +            running_wait.Wait(); +            result = system.Pause(); +            if (result != Core::System::ResultStatus::Success) { +                running_guard = false; +                this->SetRunning(false); +                emit ErrorThrown(result, system.GetStatusDetails()); +            } +            running_guard = false; -            was_active = running || exec_step; -            if (!was_active && !stop_run) +            if (!stop_run) { +                was_active = true;                  emit DebugModeEntered(); +            }          } else if (exec_step) { -            if (!was_active) -                emit DebugModeLeft(); - -            exec_step = false; -            Core::System::GetInstance().SingleStep(); -            emit DebugModeEntered(); -            yieldCurrentThread(); - -            was_active = false; +            UNIMPLEMENTED();          } else {              std::unique_lock lock{running_mutex};              running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; }); @@ -94,7 +110,7 @@ void EmuThread::run() {      }      // Shutdown the core emulation -    Core::System::GetInstance().Shutdown(); +    system.Shutdown();  #if MICROPROFILE_ENABLED      MicroProfileOnThreadExit(); @@ -360,7 +376,7 @@ QByteArray GRenderWindow::saveGeometry() {  }  qreal GRenderWindow::windowPixelRatio() const { -    return devicePixelRatio(); +    return devicePixelRatioF();  }  std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF& pos) const { diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 3626604ca..6c59b4d5c 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h @@ -59,6 +59,12 @@ public:          this->running = running;          lock.unlock();          running_cv.notify_all(); +        if (!running) { +            running_wait.Set(); +            /// Wait until effectively paused +            while (running_guard) +                ; +        }      }      /** @@ -84,6 +90,8 @@ private:      std::atomic_bool stop_run{false};      std::mutex running_mutex;      std::condition_variable running_cv; +    Common::Event running_wait{}; +    std::atomic_bool running_guard{false};  signals:      /** diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 32c81dc70..bbbd96113 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -211,7 +211,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default  // This must be in alphabetical order according to action name as it must have the same order as  // UISetting::values.shortcuts, which is alphabetically ordered.  // clang-format off -const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{ +const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{      {QStringLiteral("Capture Screenshot"),       QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::ApplicationShortcut}},      {QStringLiteral("Change Docked Mode"),       QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},      {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, @@ -222,6 +222,7 @@ const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{      {QStringLiteral("Increase Speed Limit"),     QStringLiteral("Main Window"), {QStringLiteral("+"), Qt::ApplicationShortcut}},      {QStringLiteral("Load Amiibo"),              QStringLiteral("Main Window"), {QStringLiteral("F2"), Qt::ApplicationShortcut}},      {QStringLiteral("Load File"),                QStringLiteral("Main Window"), {QStringLiteral("Ctrl+O"), Qt::WindowShortcut}}, +    {QStringLiteral("Mute Audio"),               QStringLiteral("Main Window"), {QStringLiteral("Ctrl+M"), Qt::WindowShortcut}},      {QStringLiteral("Restart Emulation"),        QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},      {QStringLiteral("Stop Emulation"),           QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},      {QStringLiteral("Toggle Filter Bar"),        QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 5cd2a5feb..09316382c 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -27,7 +27,7 @@ public:          default_mouse_buttons;      static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;      static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; -    static const std::array<UISettings::Shortcut, 15> default_hotkeys; +    static const std::array<UISettings::Shortcut, 16> default_hotkeys;  private:      void ReadValues(); diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp index cb95423e0..74b2ad537 100644 --- a/src/yuzu/configuration/configure_general.cpp +++ b/src/yuzu/configuration/configure_general.cpp @@ -23,6 +23,11 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)  ConfigureGeneral::~ConfigureGeneral() = default;  void ConfigureGeneral::SetConfiguration() { +    const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); + +    ui->use_multi_core->setEnabled(runtime_lock); +    ui->use_multi_core->setChecked(Settings::values.use_multi_core); +      ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);      ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);      ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background); @@ -41,6 +46,7 @@ void ConfigureGeneral::ApplyConfiguration() {      Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();      Settings::values.frame_limit = ui->frame_limit->value(); +    Settings::values.use_multi_core = ui->use_multi_core->isChecked();  }  void ConfigureGeneral::changeEvent(QEvent* event) { diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui index fc3b7e65a..2711116a2 100644 --- a/src/yuzu/configuration/configure_general.ui +++ b/src/yuzu/configuration/configure_general.ui @@ -52,6 +52,13 @@             </layout>            </item>            <item> +           <widget class="QCheckBox" name="use_multi_core"> +            <property name="text"> +             <string>Multicore CPU Emulation</string> +            </property> +           </widget> +          </item> +          <item>             <widget class="QCheckBox" name="toggle_check_exit">              <property name="text">               <string>Confirm exit while emulation is running</string> diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index c1ea25fb8..9bb0a0109 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -2,10 +2,13 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <fmt/format.h> +  #include "yuzu/debugger/wait_tree.h"  #include "yuzu/util/util.h"  #include "common/assert.h" +#include "core/arm/arm_interface.h"  #include "core/core.h"  #include "core/hle/kernel/handle_table.h"  #include "core/hle/kernel/mutex.h" @@ -59,8 +62,10 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList()      std::size_t row = 0;      auto add_threads = [&](const std::vector<std::shared_ptr<Kernel::Thread>>& threads) {          for (std::size_t i = 0; i < threads.size(); ++i) { -            item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i])); -            item_list.back()->row = row; +            if (!threads[i]->IsHLEThread()) { +                item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i])); +                item_list.back()->row = row; +            }              ++row;          }      }; @@ -114,20 +119,21 @@ QString WaitTreeCallstack::GetText() const {  std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const {      std::vector<std::unique_ptr<WaitTreeItem>> list; -    constexpr std::size_t BaseRegister = 29; -    auto& memory = Core::System::GetInstance().Memory(); -    u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister]; +    if (thread.IsHLEThread()) { +        return list; +    } -    while (base_pointer != 0) { -        const u64 lr = memory.Read64(base_pointer + sizeof(u64)); -        if (lr == 0) { -            break; -        } +    if (thread.GetOwnerProcess() == nullptr || !thread.GetOwnerProcess()->Is64BitProcess()) { +        return list; +    } -        list.push_back(std::make_unique<WaitTreeText>( -            tr("0x%1").arg(lr - sizeof(u32), 16, 16, QLatin1Char{'0'}))); +    auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(), +                                                                  thread.GetContext64()); -        base_pointer = memory.Read64(base_pointer); +    for (auto& entry : backtrace) { +        std::string s = fmt::format("{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address, +                                    entry.original_address, entry.offset, entry.name); +        list.push_back(std::make_unique<WaitTreeText>(QString::fromStdString(s)));      }      return list; @@ -206,7 +212,15 @@ QString WaitTreeThread::GetText() const {          status = tr("running");          break;      case Kernel::ThreadStatus::Ready: -        status = tr("ready"); +        if (!thread.IsPaused()) { +            if (thread.WasRunning()) { +                status = tr("running"); +            } else { +                status = tr("ready"); +            } +        } else { +            status = tr("paused"); +        }          break;      case Kernel::ThreadStatus::Paused:          status = tr("paused"); @@ -254,7 +268,15 @@ QColor WaitTreeThread::GetColor() const {      case Kernel::ThreadStatus::Running:          return QColor(Qt::GlobalColor::darkGreen);      case Kernel::ThreadStatus::Ready: -        return QColor(Qt::GlobalColor::darkBlue); +        if (!thread.IsPaused()) { +            if (thread.WasRunning()) { +                return QColor(Qt::GlobalColor::darkGreen); +            } else { +                return QColor(Qt::GlobalColor::darkBlue); +            } +        } else { +            return QColor(Qt::GlobalColor::lightGray); +        }      case Kernel::ThreadStatus::Paused:          return QColor(Qt::GlobalColor::lightGray);      case Kernel::ThreadStatus::WaitHLEEvent: @@ -319,7 +341,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {      if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) {          list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(), -                                                            thread.IsSleepingOnWait())); +                                                            thread.IsWaitingSync()));      }      list.push_back(std::make_unique<WaitTreeCallstack>(thread)); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 4119d7907..fb299a39b 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -56,6 +56,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual  #include <QShortcut>  #include <QStatusBar>  #include <QSysInfo> +#include <QUrl>  #include <QtConcurrent/QtConcurrent>  #include <fmt/format.h> @@ -217,7 +218,20 @@ GMainWindow::GMainWindow()      LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch,               Common::g_scm_desc);  #ifdef ARCHITECTURE_x86_64 -    LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); +    const auto& caps = Common::GetCPUCaps(); +    std::string cpu_string = caps.cpu_string; +    if (caps.avx || caps.avx2 || caps.avx512) { +        cpu_string += " | AVX"; +        if (caps.avx512) { +            cpu_string += "512"; +        } else if (caps.avx2) { +            cpu_string += '2'; +        } +        if (caps.fma || caps.fma4) { +            cpu_string += " | FMA"; +        } +    } +    LOG_INFO(Frontend, "Host CPU: {}", cpu_string);  #endif      LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());      LOG_INFO(Frontend, "Host RAM: {:.2f} GB", @@ -520,14 +534,36 @@ void GMainWindow::InitializeWidgets() {          if (emulation_running) {              return;          } -        Settings::values.use_asynchronous_gpu_emulation = -            !Settings::values.use_asynchronous_gpu_emulation; +        bool is_async = +            !Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core; +        Settings::values.use_asynchronous_gpu_emulation = is_async;          async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);          Settings::Apply();      });      async_status_button->setText(tr("ASYNC"));      async_status_button->setCheckable(true);      async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); + +    // Setup Multicore button +    multicore_status_button = new QPushButton(); +    multicore_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton")); +    multicore_status_button->setFocusPolicy(Qt::NoFocus); +    connect(multicore_status_button, &QPushButton::clicked, [&] { +        if (emulation_running) { +            return; +        } +        Settings::values.use_multi_core = !Settings::values.use_multi_core; +        bool is_async = +            Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core; +        Settings::values.use_asynchronous_gpu_emulation = is_async; +        async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); +        multicore_status_button->setChecked(Settings::values.use_multi_core); +        Settings::Apply(); +    }); +    multicore_status_button->setText(tr("MULTICORE")); +    multicore_status_button->setCheckable(true); +    multicore_status_button->setChecked(Settings::values.use_multi_core); +    statusBar()->insertPermanentWidget(0, multicore_status_button);      statusBar()->insertPermanentWidget(0, async_status_button);      // Setup Renderer API button @@ -653,6 +689,11 @@ void GMainWindow::InitializeHotkeys() {      ui.action_Capture_Screenshot->setShortcutContext(          hotkey_registry.GetShortcutContext(main_window, capture_screenshot)); +    ui.action_Fullscreen->setShortcut( +        hotkey_registry.GetHotkey(main_window, fullscreen, this)->key()); +    ui.action_Fullscreen->setShortcutContext( +        hotkey_registry.GetShortcutContext(main_window, fullscreen)); +      connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this),              &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile);      connect( @@ -723,6 +764,9 @@ void GMainWindow::InitializeHotkeys() {                                      Settings::values.use_docked_mode);                  dock_status_button->setChecked(Settings::values.use_docked_mode);              }); +    connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this), +            &QShortcut::activated, this, +            [] { Settings::values.audio_muted = !Settings::values.audio_muted; });  }  void GMainWindow::SetDefaultUIGeometry() { @@ -823,6 +867,10 @@ void GMainWindow::ConnectMenuEvents() {      connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame);      connect(ui.action_Report_Compatibility, &QAction::triggered, this,              &GMainWindow::OnMenuReportCompatibility); +    connect(ui.action_Open_Mods_Page, &QAction::triggered, this, &GMainWindow::OnOpenModsPage); +    connect(ui.action_Open_Quickstart_Guide, &QAction::triggered, this, +            &GMainWindow::OnOpenQuickstartGuide); +    connect(ui.action_Open_FAQ, &QAction::triggered, this, &GMainWindow::OnOpenFAQ);      connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); });      connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure); @@ -836,10 +884,6 @@ void GMainWindow::ConnectMenuEvents() {      connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize);      // Fullscreen -    ui.action_Fullscreen->setShortcut( -        hotkey_registry -            .GetHotkey(QStringLiteral("Main Window"), QStringLiteral("Fullscreen"), this) -            ->key());      connect(ui.action_Fullscreen, &QAction::triggered, this, &GMainWindow::ToggleFullscreen);      // Movie @@ -907,6 +951,8 @@ bool GMainWindow::LoadROM(const QString& filename) {          nullptr,                                     // E-Commerce      }); +    system.RegisterHostThread(); +      const Core::System::ResultStatus result{system.Load(*render_window, filename.toStdString())};      const auto drd_callout = @@ -1023,6 +1069,7 @@ void GMainWindow::BootGame(const QString& filename) {      }      status_bar_update_timer.start(2000);      async_status_button->setDisabled(true); +    multicore_status_button->setDisabled(true);      renderer_status_button->setDisabled(true);      if (UISettings::values.hide_mouse) { @@ -1034,17 +1081,19 @@ void GMainWindow::BootGame(const QString& filename) {      const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();      std::string title_name; +    std::string title_version;      const auto res = Core::System::GetInstance().GetGameName(title_name); -    if (res != Loader::ResultStatus::Success) { -        const auto metadata = FileSys::PatchManager(title_id).GetControlMetadata(); -        if (metadata.first != nullptr) -            title_name = metadata.first->GetApplicationName(); -        if (title_name.empty()) -            title_name = FileUtil::GetFilename(filename.toStdString()); +    const auto metadata = FileSys::PatchManager(title_id).GetControlMetadata(); +    if (metadata.first != nullptr) { +        title_version = metadata.first->GetVersionString(); +        title_name = metadata.first->GetApplicationName(); +    } +    if (res != Loader::ResultStatus::Success || title_name.empty()) { +        title_name = FileUtil::GetFilename(filename.toStdString());      } -    LOG_INFO(Frontend, "Booting game: {:016X} | {}", title_id, title_name); -    UpdateWindowTitle(QString::fromStdString(title_name)); +    LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version); +    UpdateWindowTitle(title_name, title_version);      loading_screen->Prepare(Core::System::GetInstance().GetAppLoader());      loading_screen->show(); @@ -1110,6 +1159,7 @@ void GMainWindow::ShutdownGame() {      game_fps_label->setVisible(false);      emu_frametime_label->setVisible(false);      async_status_button->setEnabled(true); +    multicore_status_button->setEnabled(true);  #ifdef HAS_VULKAN      renderer_status_button->setEnabled(true);  #endif @@ -1794,6 +1844,26 @@ void GMainWindow::OnMenuReportCompatibility() {      }  } +void GMainWindow::OpenURL(const QUrl& url) { +    const bool open = QDesktopServices::openUrl(url); +    if (!open) { +        QMessageBox::warning(this, tr("Error opening URL"), +                             tr("Unable to open the URL \"%1\".").arg(url.toString())); +    } +} + +void GMainWindow::OnOpenModsPage() { +    OpenURL(QUrl(QStringLiteral("https://github.com/yuzu-emu/yuzu/wiki/Switch-Mods"))); +} + +void GMainWindow::OnOpenQuickstartGuide() { +    OpenURL(QUrl(QStringLiteral("https://yuzu-emu.org/help/quickstart/"))); +} + +void GMainWindow::OnOpenFAQ() { +    OpenURL(QUrl(QStringLiteral("https://yuzu-emu.org/wiki/faq/"))); +} +  void GMainWindow::ToggleFullscreen() {      if (!emulation_running) {          return; @@ -1905,7 +1975,11 @@ void GMainWindow::OnConfigure() {      }      dock_status_button->setChecked(Settings::values.use_docked_mode); +    multicore_status_button->setChecked(Settings::values.use_multi_core); +    Settings::values.use_asynchronous_gpu_emulation = +        Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;      async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation); +  #ifdef HAS_VULKAN      renderer_status_button->setChecked(Settings::values.renderer_backend ==                                         Settings::RendererBackend::Vulkan); @@ -1992,7 +2066,8 @@ void GMainWindow::OnCaptureScreenshot() {      OnStartGame();  } -void GMainWindow::UpdateWindowTitle(const QString& title_name) { +void GMainWindow::UpdateWindowTitle(const std::string& title_name, +                                    const std::string& title_version) {      const auto full_name = std::string(Common::g_build_fullname);      const auto branch_name = std::string(Common::g_scm_branch);      const auto description = std::string(Common::g_scm_desc); @@ -2001,7 +2076,7 @@ void GMainWindow::UpdateWindowTitle(const QString& title_name) {      const auto date =          QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd")).toStdString(); -    if (title_name.isEmpty()) { +    if (title_name.empty()) {          const auto fmt = std::string(Common::g_title_bar_format_idle);          setWindowTitle(QString::fromStdString(fmt::format(fmt.empty() ? "yuzu {0}| {1}-{2}" : fmt,                                                            full_name, branch_name, description, @@ -2009,8 +2084,8 @@ void GMainWindow::UpdateWindowTitle(const QString& title_name) {      } else {          const auto fmt = std::string(Common::g_title_bar_format_running);          setWindowTitle(QString::fromStdString( -            fmt::format(fmt.empty() ? "yuzu {0}| {3} | {1}-{2}" : fmt, full_name, branch_name, -                        description, title_name.toStdString(), date, build_id))); +            fmt::format(fmt.empty() ? "yuzu {0}| {3} | {6} | {1}-{2}" : fmt, full_name, branch_name, +                        description, title_name, date, build_id, title_version)));      }  } @@ -2032,7 +2107,7 @@ void GMainWindow::UpdateStatusBar() {      game_fps_label->setText(tr("Game: %1 FPS").arg(results.game_fps, 0, 'f', 0));      emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2)); -    emu_speed_label->setVisible(true); +    emu_speed_label->setVisible(!Settings::values.use_multi_core);      game_fps_label->setVisible(true);      emu_frametime_label->setVisible(true);  } @@ -2151,7 +2226,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {                           "title.keys_autogenerated");      } -    Core::Crypto::KeyManager keys{}; +    Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();      if (keys.BaseDeriveNecessary()) {          Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory(              FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir), FileSys::Mode::Read)}; diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 4f4c8ddbe..66c84e5c0 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -181,6 +181,9 @@ private slots:      void OnPauseGame();      void OnStopGame();      void OnMenuReportCompatibility(); +    void OnOpenModsPage(); +    void OnOpenQuickstartGuide(); +    void OnOpenFAQ();      /// Called whenever a user selects a game in the game list widget.      void OnGameListLoadFile(QString game_path);      void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path); @@ -215,10 +218,12 @@ private slots:  private:      std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); -    void UpdateWindowTitle(const QString& title_name = {}); +    void UpdateWindowTitle(const std::string& title_name = {}, +                           const std::string& title_version = {});      void UpdateStatusBar();      void HideMouseCursor();      void ShowMouseCursor(); +    void OpenURL(const QUrl& url);      Ui::MainWindow ui; @@ -234,6 +239,7 @@ private:      QLabel* game_fps_label = nullptr;      QLabel* emu_frametime_label = nullptr;      QPushButton* async_status_button = nullptr; +    QPushButton* multicore_status_button = nullptr;      QPushButton* renderer_status_button = nullptr;      QPushButton* dock_status_button = nullptr;      QTimer status_bar_update_timer; diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 97c90f50b..bee6e107e 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -113,6 +113,9 @@       <string>&Help</string>      </property>      <addaction name="action_Report_Compatibility"/> +    <addaction name="action_Open_Mods_Page"/> +    <addaction name="action_Open_Quickstart_Guide"/> +    <addaction name="action_Open_FAQ"/>      <addaction name="separator"/>      <addaction name="action_About"/>     </widget> @@ -256,6 +259,21 @@      <bool>false</bool>     </property>    </action> +  <action name="action_Open_Mods_Page"> +   <property name="text"> +    <string>Open Mods Page</string> +   </property> +  </action> +  <action name="action_Open_Quickstart_Guide"> +   <property name="text"> +    <string>Open Quickstart Guide</string> +   </property> +  </action> +  <action name="action_Open_FAQ"> +   <property name="text"> +    <string>FAQ</string> +   </property> +  </action>    <action name="action_Open_yuzu_Folder">     <property name="text">      <string>Open yuzu Folder</string> diff --git a/src/yuzu/yuzu.rc b/src/yuzu/yuzu.rc index 1b253653f..4a3645a71 100644 --- a/src/yuzu/yuzu.rc +++ b/src/yuzu/yuzu.rc @@ -16,4 +16,4 @@ IDI_ICON1               ICON                    "../../dist/yuzu.ico"  // RT_MANIFEST  // -1                       RT_MANIFEST             "../../dist/yuzu.manifest" +0                       RT_MANIFEST             "../../dist/yuzu.manifest" diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 4d2ea7e9e..e6c6a839d 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -2,6 +2,7 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <chrono>  #include <iostream>  #include <memory>  #include <string> @@ -236,9 +237,11 @@ int main(int argc, char** argv) {      system.Renderer().Rasterizer().LoadDiskResources();      std::thread render_thread([&emu_window] { emu_window->Present(); }); +    system.Run();      while (emu_window->IsOpen()) { -        system.RunLoop(); +        std::this_thread::sleep_for(std::chrono::milliseconds(1));      } +    system.Pause();      render_thread.join();      system.Shutdown(); diff --git a/src/yuzu_cmd/yuzu.rc b/src/yuzu_cmd/yuzu.rc index 7de8ef3d9..0cde75e2f 100644 --- a/src/yuzu_cmd/yuzu.rc +++ b/src/yuzu_cmd/yuzu.rc @@ -14,4 +14,4 @@ YUZU_ICON               ICON                    "../../dist/yuzu.ico"  // RT_MANIFEST  // -1                       RT_MANIFEST             "../../dist/yuzu.manifest" +0                       RT_MANIFEST             "../../dist/yuzu.manifest" diff --git a/src/yuzu_tester/service/yuzutest.cpp b/src/yuzu_tester/service/yuzutest.cpp index 85d3f436b..2d3f6e3a7 100644 --- a/src/yuzu_tester/service/yuzutest.cpp +++ b/src/yuzu_tester/service/yuzutest.cpp @@ -53,7 +53,7 @@ private:          IPC::ResponseBuilder rb{ctx, 3};          rb.Push(RESULT_SUCCESS); -        rb.Push<u32>(write_size); +        rb.Push<u32>(static_cast<u32>(write_size));      }      void StartIndividual(Kernel::HLERequestContext& ctx) { diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp index 676e70ebd..083667baf 100644 --- a/src/yuzu_tester/yuzu.cpp +++ b/src/yuzu_tester/yuzu.cpp @@ -2,6 +2,7 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include <chrono>  #include <iostream>  #include <memory>  #include <string> @@ -255,9 +256,11 @@ int main(int argc, char** argv) {      system.GPU().Start();      system.Renderer().Rasterizer().LoadDiskResources(); +    system.Run();      while (!finished) { -        system.RunLoop(); +        std::this_thread::sleep_for(std::chrono::milliseconds(1));      } +    system.Pause();      detached_tasks.WaitForAllTasks();      return return_value; diff --git a/src/yuzu_tester/yuzu.rc b/src/yuzu_tester/yuzu.rc index 7de8ef3d9..0cde75e2f 100644 --- a/src/yuzu_tester/yuzu.rc +++ b/src/yuzu_tester/yuzu.rc @@ -14,4 +14,4 @@ YUZU_ICON               ICON                    "../../dist/yuzu.ico"  // RT_MANIFEST  // -1                       RT_MANIFEST             "../../dist/yuzu.manifest" +0                       RT_MANIFEST             "../../dist/yuzu.manifest"  | 
