diff options
44 files changed, 749 insertions, 331 deletions
| diff --git a/src/common/polyfill_thread.h b/src/common/polyfill_thread.h index 5a8d1ce08..b2c929d2f 100644 --- a/src/common/polyfill_thread.h +++ b/src/common/polyfill_thread.h @@ -11,6 +11,8 @@  #ifdef __cpp_lib_jthread +#include <chrono> +#include <condition_variable>  #include <stop_token>  #include <thread> @@ -21,11 +23,23 @@ void CondvarWait(Condvar& cv, Lock& lock, std::stop_token token, Pred&& pred) {      cv.wait(lock, token, std::move(pred));  } +template <typename Rep, typename Period> +bool StoppableTimedWait(std::stop_token token, const std::chrono::duration<Rep, Period>& rel_time) { +    std::condition_variable_any cv; +    std::mutex m; + +    // Perform the timed wait. +    std::unique_lock lk{m}; +    return !cv.wait_for(lk, token, rel_time, [&] { return token.stop_requested(); }); +} +  } // namespace Common  #else  #include <atomic> +#include <chrono> +#include <condition_variable>  #include <functional>  #include <list>  #include <memory> @@ -318,6 +332,28 @@ void CondvarWait(Condvar& cv, Lock& lock, std::stop_token token, Pred pred) {      cv.wait(lock, [&] { return pred() || token.stop_requested(); });  } +template <typename Rep, typename Period> +bool StoppableTimedWait(std::stop_token token, const std::chrono::duration<Rep, Period>& rel_time) { +    if (token.stop_requested()) { +        return false; +    } + +    bool stop_requested = false; +    std::condition_variable cv; +    std::mutex m; + +    std::stop_callback cb(token, [&] { +        // Wake up the waiting thread. +        std::unique_lock lk{m}; +        stop_requested = true; +        cv.notify_one(); +    }); + +    // Perform the timed wait. +    std::unique_lock lk{m}; +    return !cv.wait_for(lk, rel_time, [&] { return stop_requested; }); +} +  } // namespace Common  #endif diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index 124149697..0c6b20db3 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp @@ -171,7 +171,7 @@ Result KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 value)          R_UNLESS(owner_thread != nullptr, ResultInvalidHandle);          // Update the lock. -        cur_thread->SetAddressKey(addr, value); +        cur_thread->SetUserAddressKey(addr, value);          owner_thread->AddWaiter(cur_thread);          // Begin waiting. diff --git a/src/core/hle/kernel/k_light_lock.cpp b/src/core/hle/kernel/k_light_lock.cpp index 43185320d..d791acbe3 100644 --- a/src/core/hle/kernel/k_light_lock.cpp +++ b/src/core/hle/kernel/k_light_lock.cpp @@ -68,7 +68,7 @@ bool KLightLock::LockSlowPath(uintptr_t _owner, uintptr_t _cur_thread) {          // Add the current thread as a waiter on the owner.          KThread* owner_thread = reinterpret_cast<KThread*>(_owner & ~1ULL); -        cur_thread->SetAddressKey(reinterpret_cast<uintptr_t>(std::addressof(tag))); +        cur_thread->SetKernelAddressKey(reinterpret_cast<uintptr_t>(std::addressof(tag)));          owner_thread->AddWaiter(cur_thread);          // Begin waiting to hold the lock. diff --git a/src/core/hle/kernel/k_memory_layout.h b/src/core/hle/kernel/k_memory_layout.h index fd6e1d3e6..17fa1a6ed 100644 --- a/src/core/hle/kernel/k_memory_layout.h +++ b/src/core/hle/kernel/k_memory_layout.h @@ -67,9 +67,9 @@ constexpr size_t KernelPageBufferAdditionalSize = 0x33C000;  constexpr std::size_t KernelResourceSize = KernelPageTableHeapSize + KernelInitialPageHeapSize +                                             KernelSlabHeapSize + KernelPageBufferHeapSize; -constexpr bool IsKernelAddressKey(VAddr key) { -    return KernelVirtualAddressSpaceBase <= key && key <= KernelVirtualAddressSpaceLast; -} +//! NB: Use KThread::GetAddressKeyIsKernel(). +//! See explanation for deviation of GetAddressKey. +bool IsKernelAddressKey(VAddr key) = delete;  constexpr bool IsKernelAddress(VAddr address) {      return KernelVirtualAddressSpaceBase <= address && address < KernelVirtualAddressSpaceEnd; diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 21207fe99..84ff3c64b 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -330,7 +330,7 @@ void KThread::Finalize() {              KThread* const waiter = std::addressof(*it);              // The thread shouldn't be a kernel waiter. -            ASSERT(!IsKernelAddressKey(waiter->GetAddressKey())); +            ASSERT(!waiter->GetAddressKeyIsKernel());              // Clear the lock owner.              waiter->SetLockOwner(nullptr); @@ -763,19 +763,6 @@ void KThread::Continue() {      KScheduler::OnThreadStateChanged(kernel, this, old_state);  } -void KThread::WaitUntilSuspended() { -    // Make sure we have a suspend requested. -    ASSERT(IsSuspendRequested()); - -    // Loop until the thread is not executing on any core. -    for (std::size_t i = 0; i < static_cast<std::size_t>(Core::Hardware::NUM_CPU_CORES); ++i) { -        KThread* core_thread{}; -        do { -            core_thread = kernel.Scheduler(i).GetSchedulerCurrentThread(); -        } while (core_thread == this); -    } -} -  Result KThread::SetActivity(Svc::ThreadActivity activity) {      // Lock ourselves.      KScopedLightLock lk(activity_pause_lock); @@ -897,7 +884,7 @@ void KThread::AddWaiterImpl(KThread* thread) {      }      // Keep track of how many kernel waiters we have. -    if (IsKernelAddressKey(thread->GetAddressKey())) { +    if (thread->GetAddressKeyIsKernel()) {          ASSERT((num_kernel_waiters++) >= 0);          KScheduler::SetSchedulerUpdateNeeded(kernel);      } @@ -911,7 +898,7 @@ void KThread::RemoveWaiterImpl(KThread* thread) {      ASSERT(kernel.GlobalSchedulerContext().IsLocked());      // Keep track of how many kernel waiters we have. -    if (IsKernelAddressKey(thread->GetAddressKey())) { +    if (thread->GetAddressKeyIsKernel()) {          ASSERT((num_kernel_waiters--) > 0);          KScheduler::SetSchedulerUpdateNeeded(kernel);      } @@ -987,7 +974,7 @@ KThread* KThread::RemoveWaiterByKey(s32* out_num_waiters, VAddr key) {              KThread* thread = std::addressof(*it);              // Keep track of how many kernel waiters we have. -            if (IsKernelAddressKey(thread->GetAddressKey())) { +            if (thread->GetAddressKeyIsKernel()) {                  ASSERT((num_kernel_waiters--) > 0);                  KScheduler::SetSchedulerUpdateNeeded(kernel);              } diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index 7cd94a340..9d771de0e 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h @@ -214,8 +214,6 @@ public:      void Continue(); -    void WaitUntilSuspended(); -      constexpr void SetSyncedIndex(s32 index) {          synced_index = index;      } @@ -607,13 +605,30 @@ public:          return address_key_value;      } -    void SetAddressKey(VAddr key) { +    [[nodiscard]] bool GetAddressKeyIsKernel() const { +        return address_key_is_kernel; +    } + +    //! NB: intentional deviation from official kernel. +    // +    // Separate SetAddressKey into user and kernel versions +    // to cope with arbitrary host pointers making their way +    // into things. + +    void SetUserAddressKey(VAddr key) {          address_key = key; +        address_key_is_kernel = false;      } -    void SetAddressKey(VAddr key, u32 val) { +    void SetUserAddressKey(VAddr key, u32 val) {          address_key = key;          address_key_value = val; +        address_key_is_kernel = false; +    } + +    void SetKernelAddressKey(VAddr key) { +        address_key = key; +        address_key_is_kernel = true;      }      void ClearWaitQueue() { @@ -772,6 +787,7 @@ private:      bool debug_attached{};      s8 priority_inheritance_count{};      bool resource_limit_release_hint{}; +    bool address_key_is_kernel{};      StackParameters stack_parameters{};      Common::SpinLock context_guard{}; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 1fb25f221..d9eafe261 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -1198,28 +1198,35 @@ void KernelCore::Suspend(bool suspended) {      const bool should_suspend{exception_exited || suspended};      const auto activity = should_suspend ? ProcessActivity::Paused : ProcessActivity::Runnable; -    std::vector<KScopedAutoObject<KThread>> process_threads; -    { -        KScopedSchedulerLock sl{*this}; +    //! This refers to the application process, not the current process. +    KScopedAutoObject<KProcess> process = CurrentProcess(); +    if (process.IsNull()) { +        return; +    } -        if (auto* process = CurrentProcess(); process != nullptr) { -            process->SetActivity(activity); +    // Set the new activity. +    process->SetActivity(activity); -            if (!should_suspend) { -                // Runnable now; no need to wait. -                return; -            } +    // Wait for process execution to stop. +    bool must_wait{should_suspend}; + +    // KernelCore::Suspend must be called from locked context, or we +    // could race another call to SetActivity, interfering with waiting. +    while (must_wait) { +        KScopedSchedulerLock sl{*this}; + +        // Assume that all threads have finished running. +        must_wait = false; -            for (auto* thread : process->GetThreadList()) { -                process_threads.emplace_back(thread); +        for (auto i = 0; i < static_cast<s32>(Core::Hardware::NUM_CPU_CORES); ++i) { +            if (Scheduler(i).GetSchedulerCurrentThread()->GetOwnerProcess() == +                process.GetPointerUnsafe()) { +                // A thread has not finished running yet. +                // Continue waiting. +                must_wait = true;              }          }      } - -    // Wait for execution to stop. -    for (auto& thread : process_threads) { -        thread->WaitUntilSuspended(); -    }  }  void KernelCore::ShutdownCores() { diff --git a/src/input_common/drivers/gc_adapter.cpp b/src/input_common/drivers/gc_adapter.cpp index ecb3e9dc2..d09ff178b 100644 --- a/src/input_common/drivers/gc_adapter.cpp +++ b/src/input_common/drivers/gc_adapter.cpp @@ -6,6 +6,7 @@  #include "common/logging/log.h"  #include "common/param_package.h" +#include "common/polyfill_thread.h"  #include "common/settings_input.h"  #include "common/thread.h"  #include "input_common/drivers/gc_adapter.h" @@ -217,8 +218,7 @@ void GCAdapter::AdapterScanThread(std::stop_token stop_token) {      Common::SetCurrentThreadName("ScanGCAdapter");      usb_adapter_handle = nullptr;      pads = {}; -    while (!stop_token.stop_requested() && !Setup()) { -        std::this_thread::sleep_for(std::chrono::seconds(2)); +    while (!Setup() && Common::StoppableTimedWait(stop_token, std::chrono::seconds{2})) {      }  } diff --git a/src/input_common/drivers/joycon.cpp b/src/input_common/drivers/joycon.cpp index 40cda400d..cedc94e63 100644 --- a/src/input_common/drivers/joycon.cpp +++ b/src/input_common/drivers/joycon.cpp @@ -5,6 +5,7 @@  #include "common/param_package.h"  #include "common/polyfill_ranges.h" +#include "common/polyfill_thread.h"  #include "common/settings.h"  #include "common/thread.h"  #include "input_common/drivers/joycon.h" @@ -67,7 +68,8 @@ void Joycons::Setup() {  void Joycons::ScanThread(std::stop_token stop_token) {      constexpr u16 nintendo_vendor_id = 0x057e;      Common::SetCurrentThreadName("JoyconScanThread"); -    while (!stop_token.stop_requested()) { + +    do {          SDL_hid_device_info* devs = SDL_hid_enumerate(nintendo_vendor_id, 0x0);          SDL_hid_device_info* cur_dev = devs; @@ -81,8 +83,7 @@ void Joycons::ScanThread(std::stop_token stop_token) {          }          SDL_hid_free_enumeration(devs); -        std::this_thread::sleep_for(std::chrono::seconds(5)); -    } +    } while (Common::StoppableTimedWait(stop_token, std::chrono::seconds{5}));  }  bool Joycons::IsDeviceNew(SDL_hid_device_info* device_info) const { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0cb1e193e..fd4a61a4d 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -279,6 +279,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile,      header += "OPTION NV_internal;"                "OPTION NV_shader_storage_buffer;"                "OPTION NV_gpu_program_fp64;"; +    // TODO: Enable only when MS is used +    header += "OPTION NV_texture_multisample;";      if (info.uses_int64_bit_atomics) {          header += "OPTION NV_shader_atomic_int64;";      } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 5bfdecc09..2fc2a0ac6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -43,10 +43,6 @@ void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {      Alias(inst, value);  } -void EmitBitCastS32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { -    Alias(inst, value); -} -  void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {      Alias(inst, value);  } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index e67e80fac..cf6065208 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -59,7 +59,7 @@ std::string Image(EmitContext& ctx, IR::TextureInstInfo info,      }  } -std::string_view TextureType(IR::TextureInstInfo info) { +std::string_view TextureType(IR::TextureInstInfo info, bool is_ms = false) {      if (info.is_depth) {          switch (info.type) {          case TextureType::Color1D: @@ -88,9 +88,9 @@ std::string_view TextureType(IR::TextureInstInfo info) {              return "ARRAY1D";          case TextureType::Color2D:          case TextureType::Color2DRect: -            return "2D"; +            return is_ms ? "2DMS" : "2D";          case TextureType::ColorArray2D: -            return "ARRAY2D"; +            return is_ms ? "ARRAY2DMS" : "ARRAY2D";          case TextureType::Color3D:              return "3D";          case TextureType::ColorCube: @@ -510,15 +510,16 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,                      const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {      const auto info{inst.Flags<IR::TextureInstInfo>()};      const auto sparse_inst{PrepareSparse(inst)}; +    const bool is_multisample{ms.type != Type::Void};      const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; -    const std::string_view type{TextureType(info)}; +    const std::string_view type{TextureType(info, is_multisample)};      const std::string texture{Texture(ctx, info, index)};      const std::string offset_vec{Offset(ctx, offset)};      const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};      const Register ret{ctx.reg_alloc.Define(inst)};      if (info.type == TextureType::Buffer) {          ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); -    } else if (ms.type != Type::Void) { +    } else if (is_multisample) {          ctx.Add("MOV.S {}.w,{};"                  "TXFMS.F{} {},{},{},{}{};",                  coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index eaaf9ba39..415a249e4 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -197,7 +197,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist  void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);  void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);  void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);  void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);  void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);  void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 8e5e6cf1f..1be4a0f59 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -48,10 +48,6 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value)      ctx.AddU64("{}=doubleBitsToUint64({});", inst, value);  } -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { -    ctx.AddF32("{}=ftoi({});", inst, value); -} -  void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {      NotImplemented();  } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index cecdbb9d6..d8874b0cc 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -414,7 +414,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde  void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,                      std::string_view coords, std::string_view offset, std::string_view lod, -                    [[maybe_unused]] std::string_view ms) { +                    std::string_view ms) {      const auto info{inst.Flags<IR::TextureInstInfo>()};      if (info.has_bias) {          throw NotImplementedException("EmitImageFetch Bias texture samples"); @@ -431,19 +431,24 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,          ctx.AddU1("{}=true;", *sparse_inst);      }      if (!sparse_inst || !supports_sparse) { -        if (!offset.empty()) { -            ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, -                    CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); +        const auto int_coords{CoordsCastToInt(coords, info)}; +        if (!ms.empty()) { +            ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms); +        } else if (!offset.empty()) { +            ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod, +                    CoordsCastToInt(offset, info));          } else {              if (info.type == TextureType::Buffer) {                  ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);              } else { -                ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, -                        CoordsCastToInt(coords, info), lod); +                ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, lod);              }          }          return;      } +    if (!ms.empty()) { +        throw NotImplementedException("EmitImageFetch Sparse MSAA samples"); +    }      if (!offset.empty()) {          ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",                    *sparse_inst, texture, CastToIntVec(coords, info), lod, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 4151c89de..c6df1dba7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -231,7 +231,6 @@ void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,  void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst);  void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);  void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);  void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst);  void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);  void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 5d01ec0cd..1b006e811 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -61,24 +61,28 @@ std::string OutputDecorator(Stage stage, u32 size) {      }  } -std::string_view SamplerType(TextureType type, bool is_depth) { -    if (is_depth) { -        switch (type) { -        case TextureType::Color1D: -            return "sampler1DShadow"; -        case TextureType::ColorArray1D: -            return "sampler1DArrayShadow"; -        case TextureType::Color2D: -            return "sampler2DShadow"; -        case TextureType::ColorArray2D: -            return "sampler2DArrayShadow"; -        case TextureType::ColorCube: -            return "samplerCubeShadow"; -        case TextureType::ColorArrayCube: -            return "samplerCubeArrayShadow"; -        default: -            throw NotImplementedException("Texture type: {}", type); -        } +std::string_view DepthSamplerType(TextureType type) { +    switch (type) { +    case TextureType::Color1D: +        return "sampler1DShadow"; +    case TextureType::ColorArray1D: +        return "sampler1DArrayShadow"; +    case TextureType::Color2D: +        return "sampler2DShadow"; +    case TextureType::ColorArray2D: +        return "sampler2DArrayShadow"; +    case TextureType::ColorCube: +        return "samplerCubeShadow"; +    case TextureType::ColorArrayCube: +        return "samplerCubeArrayShadow"; +    default: +        throw NotImplementedException("Texture type: {}", type); +    } +} + +std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) { +    if (is_multisample) { +        ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D);      }      switch (type) {      case TextureType::Color1D: @@ -87,9 +91,9 @@ std::string_view SamplerType(TextureType type, bool is_depth) {          return "sampler1DArray";      case TextureType::Color2D:      case TextureType::Color2DRect: -        return "sampler2D"; +        return is_multisample ? "sampler2DMS" : "sampler2D";      case TextureType::ColorArray2D: -        return "sampler2DArray"; +        return is_multisample ? "sampler2DMSArray" : "sampler2DArray";      case TextureType::Color3D:          return "sampler3D";      case TextureType::ColorCube: @@ -677,7 +681,7 @@ void EmitContext::SetupTextures(Bindings& bindings) {      texture_buffers.reserve(info.texture_buffer_descriptors.size());      for (const auto& desc : info.texture_buffer_descriptors) {          texture_buffers.push_back({bindings.texture, desc.count}); -        const auto sampler_type{SamplerType(TextureType::Buffer, false)}; +        const auto sampler_type{ColorSamplerType(TextureType::Buffer)};          const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};          header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,                                sampler_type, bindings.texture, array_decorator); @@ -686,7 +690,8 @@ void EmitContext::SetupTextures(Bindings& bindings) {      textures.reserve(info.texture_descriptors.size());      for (const auto& desc : info.texture_descriptors) {          textures.push_back({bindings.texture, desc.count}); -        const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; +        const auto sampler_type{desc.is_depth ? DepthSamplerType(desc.type) +                                              : ColorSamplerType(desc.type, desc.is_multisample)};          const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};          header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,                                sampler_type, bindings.texture, array_decorator); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 50daacd95..c4ca28d11 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -18,10 +18,6 @@ void EmitBitCastU64F64(EmitContext&) {      throw NotImplementedException("SPIR-V Instruction");  } -void EmitBitCastS32F32(EmitContext&) { -    throw NotImplementedException("SPIR-V Instruction"); -} -  void EmitBitCastF16U16(EmitContext&) {      throw NotImplementedException("SPIR-V Instruction");  } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e31cdc5e8..db12e8176 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -179,7 +179,6 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);  void EmitBitCastU16F16(EmitContext& ctx);  Id EmitBitCastU32F32(EmitContext& ctx, Id value);  void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastS32F32(EmitContext& ctx);  void EmitBitCastF16U16(EmitContext&);  Id EmitBitCastF32U32(EmitContext& ctx, Id value);  void EmitBitCastF64U64(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index eb2e49a68..430797d23 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -704,11 +704,6 @@ IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {  }  template <> -IR::S32 IREmitter::BitCast<IR::S32, IR::F32>(const IR::F32& value) { -    return Inst<IR::S32>(Opcode::BitCastS32F32, value); -} - -template <>  IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {      return Inst<IR::F32>(Opcode::BitCastF32U32, value);  } diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index d155afd0f..e300714f3 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -38,7 +38,6 @@ constexpr Type U8{Type::U8};  constexpr Type U16{Type::U16};  constexpr Type U32{Type::U32};  constexpr Type U64{Type::U64}; -constexpr Type S32{Type::S32};  constexpr Type F16{Type::F16};  constexpr Type F32{Type::F32};  constexpr Type F64{Type::F64}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1fe3749cc..24e82f802 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -175,7 +175,6 @@ OPCODE(SelectF64,                                           F64,            U1,  OPCODE(BitCastU16F16,                                       U16,            F16,                                                                            )  OPCODE(BitCastU32F32,                                       U32,            F32,                                                                            )  OPCODE(BitCastU64F64,                                       U64,            F64,                                                                            ) -OPCODE(BitCastS32F32,                                       S32,            F32,                                                                            )  OPCODE(BitCastF16U16,                                       F16,            U16,                                                                            )  OPCODE(BitCastF32U32,                                       F32,            U32,                                                                            )  OPCODE(BitCastF64U64,                                       F64,            U64,                                                                            ) diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 5a7c706ad..04c8c4ddb 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -24,22 +24,21 @@ enum class Type {      U16 = 1 << 7,      U32 = 1 << 8,      U64 = 1 << 9, -    S32 = 1 << 10, -    F16 = 1 << 11, -    F32 = 1 << 12, -    F64 = 1 << 13, -    U32x2 = 1 << 14, -    U32x3 = 1 << 15, -    U32x4 = 1 << 16, -    F16x2 = 1 << 17, -    F16x3 = 1 << 18, -    F16x4 = 1 << 19, -    F32x2 = 1 << 20, -    F32x3 = 1 << 21, -    F32x4 = 1 << 22, -    F64x2 = 1 << 23, -    F64x3 = 1 << 24, -    F64x4 = 1 << 25, +    F16 = 1 << 10, +    F32 = 1 << 11, +    F64 = 1 << 12, +    U32x2 = 1 << 13, +    U32x3 = 1 << 14, +    U32x4 = 1 << 15, +    F16x2 = 1 << 16, +    F16x3 = 1 << 17, +    F16x4 = 1 << 18, +    F32x2 = 1 << 19, +    F32x3 = 1 << 20, +    F32x4 = 1 << 21, +    F64x2 = 1 << 22, +    F64x3 = 1 << 23, +    F64x4 = 1 << 24,  };  DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 30ba12316..346169328 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -23,8 +23,6 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}  Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} -Value::Value(s32 value) noexcept : type{Type::S32}, imm_s32{value} {} -  Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}  Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} @@ -71,7 +69,6 @@ bool Value::operator==(const Value& other) const {          return imm_u16 == other.imm_u16;      case Type::U32:      case Type::F32: -    case Type::S32:          return imm_u32 == other.imm_u32;      case Type::U64:      case Type::F64: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 8b34356fd..883dfa24e 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -268,7 +268,6 @@ using U8 = TypedValue<Type::U8>;  using U16 = TypedValue<Type::U16>;  using U32 = TypedValue<Type::U32>;  using U64 = TypedValue<Type::U64>; -using S32 = TypedValue<Type::S32>;  using F16 = TypedValue<Type::F16>;  using F32 = TypedValue<Type::F32>;  using F64 = TypedValue<Type::F64>; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 9718c6921..70b620dcb 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -486,10 +486,10 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_      const IR::F32 w(ir.CompositeExtract(new_inst, 3));      const IR::F16F32F64 max_value(ir.Imm32(get_max_value()));      const IR::Value converted = -        ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(x)), max_value), -                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(y)), max_value), -                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(z)), max_value), -                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(w)), max_value)); +        ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(x)), max_value), +                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(y)), max_value), +                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(z)), max_value), +                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(w)), max_value));      inst.ReplaceUsesWith(converted);  }  } // Anonymous namespace diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b474eb363..4742bcbe9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -52,6 +52,8 @@ add_library(video_core STATIC      engines/puller.cpp      engines/puller.h      framebuffer_config.h +    fsr.cpp +    fsr.h      host1x/codecs/codec.cpp      host1x/codecs/codec.h      host1x/codecs/h264.cpp @@ -110,6 +112,8 @@ add_library(video_core STATIC      renderer_opengl/gl_device.h      renderer_opengl/gl_fence_manager.cpp      renderer_opengl/gl_fence_manager.h +    renderer_opengl/gl_fsr.cpp +    renderer_opengl/gl_fsr.h      renderer_opengl/gl_graphics_pipeline.cpp      renderer_opengl/gl_graphics_pipeline.h      renderer_opengl/gl_rasterizer.cpp diff --git a/src/video_core/fsr.cpp b/src/video_core/fsr.cpp new file mode 100644 index 000000000..5653c64fc --- /dev/null +++ b/src/video_core/fsr.cpp @@ -0,0 +1,148 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <cmath> +#include "video_core/fsr.h" + +namespace FSR { +namespace { +// Reimplementations of the constant generating functions in ffx_fsr1.h +// GCC generated a lot of warnings when using the official header. +u32 AU1_AH1_AF1(f32 f) { +    static constexpr u32 base[512]{ +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, +        0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, +        0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, +        0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, +        0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, +        0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, +        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, +        0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, +        0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, +        0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, +        0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, +    }; +    static constexpr s8 shift[512]{ +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, +        0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, +        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, +        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, +        0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, +        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, +        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, +        0x18, 0x18, +    }; +    const u32 u = Common::BitCast<u32>(f); +    const u32 i = u >> 23; +    return base[i] + ((u & 0x7fffff) >> shift[i]); +} + +u32 AU1_AH2_AF2(f32 a[2]) { +    return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); +} + +void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, +                f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, +                f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { +    con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); +    con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); +    con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); +    con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); +    con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); +    con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); +    con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); +    con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); +    con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); +    con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); +    con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); +    con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); +    con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); +    con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); +    con3[2] = con3[3] = 0; +} +} // Anonymous namespace + +void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], +                      f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, +                      f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, +                      f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { +    FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, +               inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); +    con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + +                                   inputOffsetInPixelsX); +    con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + +                                   inputOffsetInPixelsY); +} + +void FsrRcasCon(u32* con, f32 sharpness) { +    sharpness = std::exp2f(-sharpness); +    f32 hSharp[2]{sharpness, sharpness}; +    con[0] = Common::BitCast<u32>(sharpness); +    con[1] = AU1_AH2_AF2(hSharp); +    con[2] = 0; +    con[3] = 0; +} +} // namespace FSR diff --git a/src/video_core/fsr.h b/src/video_core/fsr.h new file mode 100644 index 000000000..db0d4ec6f --- /dev/null +++ b/src/video_core/fsr.h @@ -0,0 +1,19 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/bit_cast.h" +#include "common/common_types.h" + +namespace FSR { +// Reimplementations of the constant generating functions in ffx_fsr1.h +// GCC generated a lot of warnings when using the official header. +void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], +                      f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, +                      f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, +                      f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY); + +void FsrRcasCon(u32* con, f32 sharpness); + +} // namespace FSR diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e968ae220..dad7b07d4 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -3,12 +3,16 @@  set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) -set(GLSL_INCLUDES -    fidelityfx_fsr.comp +set(FIDELITYFX_FILES      ${FIDELITYFX_INCLUDE_DIR}/ffx_a.h      ${FIDELITYFX_INCLUDE_DIR}/ffx_fsr1.h  ) +set(GLSL_INCLUDES +    fidelityfx_fsr.comp +    ${FIDELITYFX_FILES} +) +  set(SHADER_FILES      astc_decoder.comp      blit_color_float.frag @@ -24,6 +28,9 @@ set(SHADER_FILES      fxaa.vert      opengl_convert_s8d24.comp      opengl_copy_bc4.comp +    opengl_fidelityfx_fsr.frag +    opengl_fidelityfx_fsr_easu.frag +    opengl_fidelityfx_fsr_rcas.frag      opengl_present.frag      opengl_present.vert      opengl_present_scaleforce.frag @@ -118,6 +125,25 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES})      endif()  endforeach() +foreach(FILEPATH IN ITEMS ${FIDELITYFX_FILES}) +    get_filename_component(FILENAME ${FILEPATH} NAME) +    string(REPLACE "." "_" HEADER_NAME ${FILENAME}) +    set(SOURCE_FILE ${FILEPATH}) +    set(SOURCE_HEADER_FILE ${SHADER_DIR}/${HEADER_NAME}.h) +    add_custom_command( +        OUTPUT +            ${SOURCE_HEADER_FILE} +        COMMAND +            ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} +        MAIN_DEPENDENCY +            ${SOURCE_FILE} +        DEPENDS +            ${INPUT_FILE} +            # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified +    ) +    set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) +endforeach() +  set(SHADER_SOURCES ${SHADER_FILES})  list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag new file mode 100644 index 000000000..16d22f58e --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +//!#version 460 core +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +#extension GL_AMD_gpu_shader_half_float : enable +#extension GL_NV_gpu_shader5 : enable + +// FidelityFX Super Resolution Sample +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +layout (location = 0) uniform uvec4 constants[4]; + +#define A_GPU 1 +#define A_GLSL 1 + +#ifdef YUZU_USE_FP16 +    #define A_HALF +#endif +#include "ffx_a.h" + +#ifndef YUZU_USE_FP16 +    layout (binding=0) uniform sampler2D InputTexture; +    #if USE_EASU +        #define FSR_EASU_F 1 +        AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } +        AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } +        AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } +    #endif +    #if USE_RCAS +        #define FSR_RCAS_F +        AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } +        void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} +    #endif +#else +    layout (binding=0) uniform sampler2D InputTexture; +    #if USE_EASU +        #define FSR_EASU_H 1 +        AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } +        AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } +        AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } +    #endif +    #if USE_RCAS +        #define FSR_RCAS_H +        AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } +        void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} +    #endif +#endif + +#include "ffx_fsr1.h" + +#if USE_RCAS +    layout(location = 0) in vec2 frag_texcoord; +#endif +layout (location = 0) out vec4 frag_color; + +void CurrFilter(AU2 pos) +{ +#if USE_EASU +    #ifndef YUZU_USE_FP16 +        AF3 c; +        FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]); +        frag_color = AF4(c, 1.0); +    #else +        AH3 c; +        FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]); +        frag_color = AH4(c, 1.0); +    #endif +#endif +#if USE_RCAS +    #ifndef YUZU_USE_FP16 +        AF3 c; +        FsrRcasF(c.r, c.g, c.b, pos, constants[0]); +        frag_color = AF4(c, 1.0); +    #else +        AH3 c; +        FsrRcasH(c.r, c.g, c.b, pos, constants[0]); +        frag_color = AH4(c, 1.0); +    #endif +#endif +} + +void main() +{ +#if USE_RCAS +    CurrFilter(AU2(frag_texcoord * vec2(textureSize(InputTexture, 0)))); +#else +    CurrFilter(AU2(gl_FragCoord.xy)); +#endif +} diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag new file mode 100644 index 000000000..d39f80ac1 --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr_easu.frag @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_EASU 1 + +#include "opengl_fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag new file mode 100644 index 000000000..cfa78ddc7 --- /dev/null +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr_rcas.frag @@ -0,0 +1,9 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_RCAS 1 + +#include "opengl_fidelityfx_fsr.frag" diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/gl_fsr.cpp new file mode 100644 index 000000000..77262dcf1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fsr.cpp @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/settings.h" +#include "video_core/fsr.h" +#include "video_core/renderer_opengl/gl_fsr.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { +using namespace FSR; + +using FsrConstants = std::array<u32, 4 * 4>; + +FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, +         std::string_view fsr_rcas_source) +    : fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)}, +      fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)}, +      fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} { +    glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f); +    glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f); +} + +FSR::~FSR() = default; + +void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen, +               u32 input_image_width, u32 input_image_height, +               const Common::Rectangle<int>& crop_rect) { + +    const auto output_image_width = screen.GetWidth(); +    const auto output_image_height = screen.GetHeight(); + +    if (fsr_intermediate_tex.handle) { +        GLint fsr_tex_width, fsr_tex_height; +        glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH, +                                     &fsr_tex_width); +        glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT, +                                     &fsr_tex_height); +        if (static_cast<u32>(fsr_tex_width) != output_image_width || +            static_cast<u32>(fsr_tex_height) != output_image_height) { +            fsr_intermediate_tex.Release(); +        } +    } +    if (!fsr_intermediate_tex.handle) { +        fsr_intermediate_tex.Create(GL_TEXTURE_2D); +        glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width, +                           output_image_height); +        glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0, +                                  fsr_intermediate_tex.handle, 0); +    } + +    GLint old_draw_fb; +    glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + +    glFrontFace(GL_CW); +    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle); +    glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(output_image_width), +                       static_cast<GLfloat>(output_image_height)); + +    FsrConstants constants; +    FsrEasuConOffset( +        constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12, + +        static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), +        static_cast<f32>(input_image_width), static_cast<f32>(input_image_height), +        static_cast<f32>(output_image_width), static_cast<f32>(output_image_height), +        static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); + +    glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants)); + +    program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle); +    glDrawArrays(GL_TRIANGLES, 0, 3); + +    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); +    glBindTextureUnit(0, fsr_intermediate_tex.handle); + +    const float sharpening = +        static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; + +    FsrRcasCon(constants.data(), sharpening); +    glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants)); +} + +void FSR::InitBuffers() { +    fsr_framebuffer.Create(); +} + +void FSR::ReleaseBuffers() { +    fsr_framebuffer.Release(); +    fsr_intermediate_tex.Release(); +} + +const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept { +    return fsr_rcas_frag; +} + +bool FSR::AreBuffersInitialized() const noexcept { +    return fsr_framebuffer.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/gl_fsr.h new file mode 100644 index 000000000..1f6ae3115 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fsr.h @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <string_view> + +#include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/fsr.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class ProgramManager; + +class FSR { +public: +    explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, +                 std::string_view fsr_rcas_source); +    ~FSR(); + +    void Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen, +              u32 input_image_width, u32 input_image_height, +              const Common::Rectangle<int>& crop_rect); + +    void InitBuffers(); + +    void ReleaseBuffers(); + +    [[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept; + +    [[nodiscard]] bool AreBuffersInitialized() const noexcept; + +private: +    OGLFramebuffer fsr_framebuffer; +    OGLProgram fsr_vertex; +    OGLProgram fsr_easu_frag; +    OGLProgram fsr_rcas_frag; +    OGLTexture fsr_intermediate_tex; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index de95f2634..2a74c1d05 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -17,8 +17,14 @@  #include "core/frontend/emu_window.h"  #include "core/memory.h"  #include "core/telemetry_session.h" +#include "video_core/host_shaders/ffx_a_h.h" +#include "video_core/host_shaders/ffx_fsr1_h.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h"  #include "video_core/host_shaders/fxaa_frag.h"  #include "video_core/host_shaders/fxaa_vert.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h"  #include "video_core/host_shaders/opengl_present_frag.h"  #include "video_core/host_shaders/opengl_present_scaleforce_frag.h"  #include "video_core/host_shaders/opengl_present_vert.h" @@ -31,6 +37,7 @@  #include "video_core/host_shaders/smaa_edge_detection_vert.h"  #include "video_core/host_shaders/smaa_neighborhood_blending_frag.h"  #include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" +#include "video_core/renderer_opengl/gl_fsr.h"  #include "video_core/renderer_opengl/gl_rasterizer.h"  #include "video_core/renderer_opengl/gl_shader_manager.h"  #include "video_core/renderer_opengl/gl_shader_util.h" @@ -268,12 +275,17 @@ void RendererOpenGL::InitOpenGLObjects() {      fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER);      fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); -    const auto SmaaShader = [](std::string_view specialized_source, GLenum stage) { -        std::string shader_source{specialized_source}; -        constexpr std::string_view include_string = "#include \"opengl_smaa.glsl\""; +    const auto replace_include = [](std::string& shader_source, std::string_view include_name, +                                    std::string_view include_content) { +        const std::string include_string = fmt::format("#include \"{}\"", include_name);          const std::size_t pos = shader_source.find(include_string);          ASSERT(pos != std::string::npos); -        shader_source.replace(pos, include_string.size(), HostShaders::OPENGL_SMAA_GLSL); +        shader_source.replace(pos, include_string.size(), include_content); +    }; + +    const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { +        std::string shader_source{specialized_source}; +        replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL);          return CreateProgram(shader_source, stage);      }; @@ -298,14 +310,32 @@ void RendererOpenGL::InitOpenGLObjects() {          CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),                        GL_FRAGMENT_SHADER); +    std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; +    replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); +    replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); + +    std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; +    std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; +    replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); +    replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); + +    fsr = std::make_unique<FSR>(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source, +                                fsr_rcas_frag_source); +      // Generate presentation sampler      present_sampler.Create();      glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);      glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); +    glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); +    glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); +    glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);      present_sampler_nn.Create();      glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);      glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); +    glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); +    glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); +    glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);      // Generate VBO handle for drawing      vertex_buffer.Create(); @@ -525,6 +555,31 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {          glBindTextureUnit(0, aa_texture.handle);      } +    glDisablei(GL_SCISSOR_TEST, 0); + +    if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { +        if (!fsr->AreBuffersInitialized()) { +            fsr->InitBuffers(); +        } + +        auto crop_rect = framebuffer_crop_rect; +        if (crop_rect.GetWidth() == 0) { +            crop_rect.right = framebuffer_width; +        } +        if (crop_rect.GetHeight() == 0) { +            crop_rect.bottom = framebuffer_height; +        } +        crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); +        const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width); +        const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height); +        glBindSampler(0, present_sampler.handle); +        fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect); +    } else { +        if (fsr->AreBuffersInitialized()) { +            fsr->ReleaseBuffers(); +        } +    } +      const std::array ortho_matrix =          MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); @@ -540,10 +595,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {          case Settings::ScalingFilter::ScaleForce:              return present_scaleforce_fragment.handle;          case Settings::ScalingFilter::Fsr: -            LOG_WARNING( -                Render_OpenGL, -                "FidelityFX Super Resolution is not supported in OpenGL, changing to ScaleForce"); -            return present_scaleforce_fragment.handle; +            return fsr->GetPresentFragmentProgram().handle;          default:              return present_bilinear_fragment.handle;          } @@ -578,15 +630,18 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {      f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);      f32 scale_v =          static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height); -    // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering -    // (e.g. handheld mode) on a 1920x1080 framebuffer. -    if (framebuffer_crop_rect.GetWidth() > 0) { -        scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / -                  static_cast<f32>(screen_info.texture.width); -    } -    if (framebuffer_crop_rect.GetHeight() > 0) { -        scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / -                  static_cast<f32>(screen_info.texture.height); + +    if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) { +        // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering +        // (e.g. handheld mode) on a 1920x1080 framebuffer. +        if (framebuffer_crop_rect.GetWidth() > 0) { +            scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / +                      static_cast<f32>(screen_info.texture.width); +        } +        if (framebuffer_crop_rect.GetHeight() > 0) { +            scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / +                      static_cast<f32>(screen_info.texture.height); +        }      }      if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa &&          !screen_info.was_accelerated) { @@ -612,7 +667,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {      } else {          glDisable(GL_FRAMEBUFFER_SRGB);      } -    glDisablei(GL_SCISSOR_TEST, 0);      glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),                         static_cast<GLfloat>(layout.height)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cc97d7b26..f1d5fd954 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -10,6 +10,7 @@  #include "video_core/renderer_base.h"  #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_fsr.h"  #include "video_core/renderer_opengl/gl_rasterizer.h"  #include "video_core/renderer_opengl/gl_resource_manager.h"  #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -141,6 +142,8 @@ private:      OGLTexture smaa_edges_tex;      OGLTexture smaa_blend_tex; +    std::unique_ptr<FSR> fsr; +      /// OpenGL framebuffer data      std::vector<u8> gl_framebuffer_data; diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 33daa8c1c..df972cd54 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -1,12 +1,11 @@  // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project  // SPDX-License-Identifier: GPL-2.0-or-later -#include <cmath> -#include "common/bit_cast.h"  #include "common/common_types.h"  #include "common/div_ceil.h"  #include "common/settings.h" +#include "video_core/fsr.h"  #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"  #include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"  #include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" @@ -17,146 +16,7 @@  #include "video_core/vulkan_common/vulkan_device.h"  namespace Vulkan { -namespace { -// Reimplementations of the constant generating functions in ffx_fsr1.h -// GCC generated a lot of warnings when using the official header. -u32 AU1_AH1_AF1(f32 f) { -    static constexpr u32 base[512]{ -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, -        0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, -        0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, -        0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, -        0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, -        0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, -        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, -        0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, -        0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, -        0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, -        0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, -    }; -    static constexpr s8 shift[512]{ -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, -        0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, -        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, -        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, -        0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, -        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, -        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -        0x18, 0x18, -    }; -    const u32 u = Common::BitCast<u32>(f); -    const u32 i = u >> 23; -    return base[i] + ((u & 0x7fffff) >> shift[i]); -} - -u32 AU1_AH2_AF2(f32 a[2]) { -    return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); -} - -void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX, -                f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, -                f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) { -    con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); -    con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); -    con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); -    con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); -    con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); -    con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY); -    con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); -    con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY); -    con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX); -    con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); -    con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX); -    con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY); -    con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX); -    con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY); -    con3[2] = con3[3] = 0; -} - -void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], -                      f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, -                      f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, -                      f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { -    FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, -               inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); -    con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + -                                   inputOffsetInPixelsX); -    con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + -                                   inputOffsetInPixelsY); -} - -void FsrRcasCon(u32* con, f32 sharpness) { -    sharpness = std::exp2f(-sharpness); -    f32 hSharp[2]{sharpness, sharpness}; -    con[0] = Common::BitCast<u32>(sharpness); -    con[1] = AU1_AH2_AF2(hSharp); -    con[2] = 0; -    con[3] = 0; -} -} // Anonymous namespace +using namespace FSR;  FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,           VkExtent2D output_size_) diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index bb9910a53..a45ec69ec 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -460,7 +460,7 @@               </item>               <item>                <property name="text"> -               <string>AMD FidelityFX™️ Super Resolution (Vulkan Only)</string> +               <string>AMD FidelityFX™️ Super Resolution</string>                </property>               </item>              </widget> diff --git a/src/yuzu/configuration/input_profiles.cpp b/src/yuzu/configuration/input_profiles.cpp index 9bb69cab1..41ef4250a 100644 --- a/src/yuzu/configuration/input_profiles.cpp +++ b/src/yuzu/configuration/input_profiles.cpp @@ -58,13 +58,16 @@ std::vector<std::string> InputProfiles::GetInputProfileNames() {      std::vector<std::string> profile_names;      profile_names.reserve(map_profiles.size()); -    for (const auto& [profile_name, config] : map_profiles) { +    auto it = map_profiles.cbegin(); +    while (it != map_profiles.cend()) { +        const auto& [profile_name, config] = *it;          if (!ProfileExistsInFilesystem(profile_name)) { -            DeleteProfile(profile_name); +            it = map_profiles.erase(it);              continue;          }          profile_names.push_back(profile_name); +        ++it;      }      std::stable_sort(profile_names.begin(), profile_names.end()); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 62aaf41bf..42b7b64c8 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -983,11 +983,6 @@ void GMainWindow::InitializeWidgets() {      filter_status_button->setFocusPolicy(Qt::NoFocus);      connect(filter_status_button, &QPushButton::clicked, this,              &GMainWindow::OnToggleAdaptingFilter); -    auto filter = Settings::values.scaling_filter.GetValue(); -    if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && -        filter == Settings::ScalingFilter::Fsr) { -        Settings::values.scaling_filter.SetValue(Settings::ScalingFilter::NearestNeighbor); -    }      UpdateFilterText();      filter_status_button->setCheckable(true);      filter_status_button->setChecked(true); @@ -3468,10 +3463,6 @@ void GMainWindow::OnToggleAdaptingFilter() {      } else {          filter = static_cast<Settings::ScalingFilter>(static_cast<u32>(filter) + 1);      } -    if (Settings::values.renderer_backend.GetValue() == Settings::RendererBackend::OpenGL && -        filter == Settings::ScalingFilter::Fsr) { -        filter = Settings::ScalingFilter::NearestNeighbor; -    }      Settings::values.scaling_filter.SetValue(filter);      filter_status_button->setChecked(true);      UpdateFilterText(); diff --git a/src/yuzu/multiplayer/direct_connect.cpp b/src/yuzu/multiplayer/direct_connect.cpp index cbd52da85..d71cc23a7 100644 --- a/src/yuzu/multiplayer/direct_connect.cpp +++ b/src/yuzu/multiplayer/direct_connect.cpp @@ -81,20 +81,13 @@ void DirectConnectWindow::Connect() {              }          }      } -    switch (static_cast<ConnectionType>(ui->connection_type->currentIndex())) { -    case ConnectionType::TraversalServer: -        break; -    case ConnectionType::IP: -        if (!ui->ip->hasAcceptableInput()) { -            NetworkMessage::ErrorManager::ShowError( -                NetworkMessage::ErrorManager::IP_ADDRESS_NOT_VALID); -            return; -        } -        if (!ui->port->hasAcceptableInput()) { -            NetworkMessage::ErrorManager::ShowError(NetworkMessage::ErrorManager::PORT_NOT_VALID); -            return; -        } -        break; +    if (!ui->ip->hasAcceptableInput()) { +        NetworkMessage::ErrorManager::ShowError(NetworkMessage::ErrorManager::IP_ADDRESS_NOT_VALID); +        return; +    } +    if (!ui->port->hasAcceptableInput()) { +        NetworkMessage::ErrorManager::ShowError(NetworkMessage::ErrorManager::PORT_NOT_VALID); +        return;      }      // Store settings diff --git a/src/yuzu/multiplayer/direct_connect.ui b/src/yuzu/multiplayer/direct_connect.ui index 57d6ec25a..0dd4e6829 100644 --- a/src/yuzu/multiplayer/direct_connect.ui +++ b/src/yuzu/multiplayer/direct_connect.ui @@ -27,19 +27,10 @@            <number>0</number>           </property>           <item> -          <widget class="QComboBox" name="connection_type"> -           <item> -            <property name="text"> -             <string>IP Address</string> -            </property> -           </item> -          </widget> -         </item> -         <item>            <widget class="QWidget" name="ip_container" native="true">             <layout class="QHBoxLayout" name="ip_layout">              <property name="leftMargin"> -             <number>5</number> +             <number>0</number>              </property>              <property name="topMargin">               <number>0</number> @@ -53,17 +44,17 @@              <item>               <widget class="QLabel" name="label_2">                <property name="text"> -               <string>IP</string> +               <string>Server Address</string>                </property>               </widget>              </item>              <item>               <widget class="QLineEdit" name="ip">                <property name="toolTip"> -               <string><html><head/><body><p>IPv4 address of the host</p></body></html></string> +               <string><html><head/><body><p>Server address of the host</p></body></html></string>                </property>                <property name="maxLength"> -               <number>16</number> +               <number>253</number>                </property>               </widget>              </item> @@ -85,6 +76,12 @@                <property name="placeholderText">                 <string notr="true" extracomment="placeholder string that tells user default port">24872</string>                </property> +              <property name="maximumSize"> +               <size> +                <width>65</width> +                <height>50</height> +               </size> +              </property>               </widget>              </item>             </layout> diff --git a/src/yuzu/multiplayer/validation.h b/src/yuzu/multiplayer/validation.h index dd25af280..cbbe6757b 100644 --- a/src/yuzu/multiplayer/validation.h +++ b/src/yuzu/multiplayer/validation.h @@ -38,11 +38,28 @@ private:          QRegularExpression(QStringLiteral("^[a-zA-Z0-9._ -]{4,20}"));      QRegularExpressionValidator nickname; -    /// ipv4 address only -    // TODO remove this when we support hostnames in direct connect +    /// ipv4 / ipv6 / hostnames      QRegularExpression ip_regex = QRegularExpression(QStringLiteral( -        "(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|" -        "2[0-4][0-9]|25[0-5])")); +        // IPv4 regex +        "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$|" +        // IPv6 regex +        "^((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|" +        "(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-" +        "5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|" +        "(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)" +        "(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|" +        "(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]" +        "\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|" +        "(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[" +        "0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|" +        "(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[" +        "0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|" +        "(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[" +        "0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|" +        "(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?" +        "\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:)))(%.+)?$|" +        // Hostname regex +        "^([a-zA-Z0-9]+(-[a-zA-Z0-9]+)*\\.)+[a-zA-Z]{2,}$"));      QRegularExpressionValidator ip;      /// port must be between 0 and 65535 | 
