diff options
41 files changed, 1944 insertions, 128 deletions
| diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 7f0a6d069..d807ef65f 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -92,10 +92,14 @@ endif()  add_subdirectory(sirit)  if (ENABLE_WEB_SERVICE) -    find_package(OpenSSL 1.1) -    if (OPENSSL_FOUND) -        set(OPENSSL_LIBRARIES OpenSSL::SSL OpenSSL::Crypto) -    else() +    if (NOT WIN32) +        find_package(OpenSSL 1.1) +        if (OPENSSL_FOUND) +            set(OPENSSL_LIBRARIES OpenSSL::SSL OpenSSL::Crypto) +        endif() +    endif() + +    if (WIN32 OR NOT OPENSSL_FOUND)          # LibreSSL          set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")          set(OPENSSLDIR "/etc/ssl/") diff --git a/src/audio_core/renderer/command/command_buffer.cpp b/src/audio_core/renderer/command/command_buffer.cpp index 2ef879ee1..8c6fe97e7 100644 --- a/src/audio_core/renderer/command/command_buffer.cpp +++ b/src/audio_core/renderer/command/command_buffer.cpp @@ -460,21 +460,23 @@ void CommandBuffer::GenerateDeviceSinkCommand(const s32 node_id, const s16 buffe      cmd.session_id = session_id; +    cmd.input_count = parameter.input_count; +    s16 max_input{0}; +    for (u32 i = 0; i < parameter.input_count; i++) { +        cmd.inputs[i] = buffer_offset + parameter.inputs[i]; +        max_input = std::max(max_input, cmd.inputs[i]); +    } +      if (state.upsampler_info != nullptr) {          const auto size_{state.upsampler_info->sample_count * parameter.input_count};          const auto size_bytes{size_ * sizeof(s32)};          const auto addr{memory_pool->Translate(state.upsampler_info->samples_pos, size_bytes)};          cmd.sample_buffer = {reinterpret_cast<s32*>(addr), -                             parameter.input_count * state.upsampler_info->sample_count}; +                             (max_input + 1) * state.upsampler_info->sample_count};      } else {          cmd.sample_buffer = samples_buffer;      } -    cmd.input_count = parameter.input_count; -    for (u32 i = 0; i < parameter.input_count; i++) { -        cmd.inputs[i] = buffer_offset + parameter.inputs[i]; -    } -      GenerateEnd<DeviceSinkCommand>(cmd);  } diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index d87be72d6..e252b5f4b 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h @@ -199,7 +199,7 @@ public:      ~HLERequestContext();      /// Returns a pointer to the IPC command buffer for this request. -    u32* CommandBuffer() { +    [[nodiscard]] u32* CommandBuffer() {          return cmd_buf.data();      } @@ -207,7 +207,7 @@ public:       * Returns the session through which this request was made. This can be used as a map key to       * access per-client data on services.       */ -    Kernel::KServerSession* Session() { +    [[nodiscard]] Kernel::KServerSession* Session() {          return server_session;      } @@ -217,61 +217,61 @@ public:      /// Writes data from this context back to the requesting process/thread.      Result WriteToOutgoingCommandBuffer(KThread& requesting_thread); -    u32_le GetHipcCommand() const { +    [[nodiscard]] u32_le GetHipcCommand() const {          return command;      } -    u32_le GetTipcCommand() const { +    [[nodiscard]] u32_le GetTipcCommand() const {          return static_cast<u32_le>(command_header->type.Value()) -                 static_cast<u32_le>(IPC::CommandType::TIPC_CommandRegion);      } -    u32_le GetCommand() const { +    [[nodiscard]] u32_le GetCommand() const {          return command_header->IsTipc() ? GetTipcCommand() : GetHipcCommand();      } -    bool IsTipc() const { +    [[nodiscard]] bool IsTipc() const {          return command_header->IsTipc();      } -    IPC::CommandType GetCommandType() const { +    [[nodiscard]] IPC::CommandType GetCommandType() const {          return command_header->type;      } -    u64 GetPID() const { +    [[nodiscard]] u64 GetPID() const {          return pid;      } -    u32 GetDataPayloadOffset() const { +    [[nodiscard]] u32 GetDataPayloadOffset() const {          return data_payload_offset;      } -    const std::vector<IPC::BufferDescriptorX>& BufferDescriptorX() const { +    [[nodiscard]] const std::vector<IPC::BufferDescriptorX>& BufferDescriptorX() const {          return buffer_x_desciptors;      } -    const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorA() const { +    [[nodiscard]] const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorA() const {          return buffer_a_desciptors;      } -    const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorB() const { +    [[nodiscard]] const std::vector<IPC::BufferDescriptorABW>& BufferDescriptorB() const {          return buffer_b_desciptors;      } -    const std::vector<IPC::BufferDescriptorC>& BufferDescriptorC() const { +    [[nodiscard]] const std::vector<IPC::BufferDescriptorC>& BufferDescriptorC() const {          return buffer_c_desciptors;      } -    const IPC::DomainMessageHeader& GetDomainMessageHeader() const { +    [[nodiscard]] const IPC::DomainMessageHeader& GetDomainMessageHeader() const {          return domain_message_header.value();      } -    bool HasDomainMessageHeader() const { +    [[nodiscard]] bool HasDomainMessageHeader() const {          return domain_message_header.has_value();      }      /// Helper function to read a buffer using the appropriate buffer descriptor -    std::vector<u8> ReadBuffer(std::size_t buffer_index = 0) const; +    [[nodiscard]] std::vector<u8> ReadBuffer(std::size_t buffer_index = 0) const;      /// Helper function to write a buffer using the appropriate buffer descriptor      std::size_t WriteBuffer(const void* buffer, std::size_t size, @@ -308,22 +308,34 @@ public:      }      /// Helper function to get the size of the input buffer -    std::size_t GetReadBufferSize(std::size_t buffer_index = 0) const; +    [[nodiscard]] std::size_t GetReadBufferSize(std::size_t buffer_index = 0) const;      /// Helper function to get the size of the output buffer -    std::size_t GetWriteBufferSize(std::size_t buffer_index = 0) const; +    [[nodiscard]] std::size_t GetWriteBufferSize(std::size_t buffer_index = 0) const; + +    /// Helper function to derive the number of elements able to be contained in the read buffer +    template <typename T> +    [[nodiscard]] std::size_t GetReadBufferNumElements(std::size_t buffer_index = 0) const { +        return GetReadBufferSize(buffer_index) / sizeof(T); +    } + +    /// Helper function to derive the number of elements able to be contained in the write buffer +    template <typename T> +    [[nodiscard]] std::size_t GetWriteBufferNumElements(std::size_t buffer_index = 0) const { +        return GetWriteBufferSize(buffer_index) / sizeof(T); +    }      /// Helper function to test whether the input buffer at buffer_index can be read -    bool CanReadBuffer(std::size_t buffer_index = 0) const; +    [[nodiscard]] bool CanReadBuffer(std::size_t buffer_index = 0) const;      /// Helper function to test whether the output buffer at buffer_index can be written -    bool CanWriteBuffer(std::size_t buffer_index = 0) const; +    [[nodiscard]] bool CanWriteBuffer(std::size_t buffer_index = 0) const; -    Handle GetCopyHandle(std::size_t index) const { +    [[nodiscard]] Handle GetCopyHandle(std::size_t index) const {          return incoming_copy_handles.at(index);      } -    Handle GetMoveHandle(std::size_t index) const { +    [[nodiscard]] Handle GetMoveHandle(std::size_t index) const {          return incoming_move_handles.at(index);      } @@ -348,13 +360,13 @@ public:          manager = manager_;      } -    std::string Description() const; +    [[nodiscard]] std::string Description() const; -    KThread& GetThread() { +    [[nodiscard]] KThread& GetThread() {          return *thread;      } -    std::shared_ptr<SessionRequestManager> GetManager() const { +    [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const {          return manager.lock();      } diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index 608925dfc..053e8f9dd 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp @@ -122,10 +122,10 @@ private:      }      void GetReleasedAudioInBuffer(Kernel::HLERequestContext& ctx) { -        auto write_buffer_size = ctx.GetWriteBufferSize() / sizeof(u64); -        std::vector<u64> released_buffers(write_buffer_size, 0); +        const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); +        std::vector<u64> released_buffers(write_buffer_size); -        auto count = impl->GetReleasedBuffers(released_buffers); +        const auto count = impl->GetReleasedBuffers(released_buffers);          [[maybe_unused]] std::string tags{};          for (u32 i = 0; i < count; i++) { @@ -228,7 +228,7 @@ void AudInU::ListAudioIns(Kernel::HLERequestContext& ctx) {      LOG_DEBUG(Service_Audio, "called");      const auto write_count = -        static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); +        static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>());      std::vector<AudioDevice::AudioDeviceName> device_names{};      u32 out_count{0}; @@ -248,7 +248,7 @@ void AudInU::ListAudioInsAutoFiltered(Kernel::HLERequestContext& ctx) {      LOG_DEBUG(Service_Audio, "called");      const auto write_count = -        static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); +        static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>());      std::vector<AudioDevice::AudioDeviceName> device_names{};      u32 out_count{0}; diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 122290c6a..29751f075 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp @@ -129,16 +129,16 @@ private:      }      void GetReleasedAudioOutBuffers(Kernel::HLERequestContext& ctx) { -        auto write_buffer_size = ctx.GetWriteBufferSize() / sizeof(u64); -        std::vector<u64> released_buffers(write_buffer_size, 0); +        const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); +        std::vector<u64> released_buffers(write_buffer_size); -        auto count = impl->GetReleasedBuffers(released_buffers); +        const auto count = impl->GetReleasedBuffers(released_buffers);          [[maybe_unused]] std::string tags{};          for (u32 i = 0; i < count; i++) {              tags += fmt::format("{:08X}, ", released_buffers[i]);          } -        [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()}; +        [[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()};          LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,                    tags); @@ -244,7 +244,7 @@ void AudOutU::ListAudioOuts(Kernel::HLERequestContext& ctx) {      std::scoped_lock l{impl->mutex};      const auto write_count = -        static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName)); +        static_cast<u32>(ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>());      std::vector<AudioDevice::AudioDeviceName> device_names{};      if (write_count > 0) {          device_names.emplace_back("DeviceOut"); diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 13423dca6..034ee273f 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -274,7 +274,7 @@ public:  private:      void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { -        const size_t in_count = ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName); +        const size_t in_count = ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>();          std::vector<AudioDevice::AudioDeviceName> out_names{}; @@ -335,7 +335,7 @@ private:      }      void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { -        const auto write_size = ctx.GetWriteBufferSize() / sizeof(char); +        const auto write_size = ctx.GetWriteBufferSize();          std::string out_name{"AudioTvOutput"};          LOG_DEBUG(Service_Audio, "(STUBBED) called. Name={}", out_name); @@ -387,7 +387,7 @@ private:      }      void ListAudioOutputDeviceName(Kernel::HLERequestContext& ctx) { -        const size_t in_count = ctx.GetWriteBufferSize() / sizeof(AudioDevice::AudioDeviceName); +        const size_t in_count = ctx.GetWriteBufferNumElements<AudioDevice::AudioDeviceName>();          std::vector<AudioDevice::AudioDeviceName> out_names{}; diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index 8bafc3a98..825fb8bcc 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -68,7 +68,7 @@ private:                                   ExtraBehavior extra_behavior) {          u32 consumed = 0;          u32 sample_count = 0; -        std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); +        std::vector<opus_int16> samples(ctx.GetWriteBufferNumElements<opus_int16>());          if (extra_behavior == ExtraBehavior::ResetContext) {              ResetDecoderContext(); diff --git a/src/core/hle/service/bcat/bcat_module.cpp b/src/core/hle/service/bcat/bcat_module.cpp index bc08ac487..cbe690a5d 100644 --- a/src/core/hle/service/bcat/bcat_module.cpp +++ b/src/core/hle/service/bcat/bcat_module.cpp @@ -443,7 +443,7 @@ private:      }      void Read(Kernel::HLERequestContext& ctx) { -        auto write_size = ctx.GetWriteBufferSize() / sizeof(DeliveryCacheDirectoryEntry); +        auto write_size = ctx.GetWriteBufferNumElements<DeliveryCacheDirectoryEntry>();          LOG_DEBUG(Service_BCAT, "called, write_size={:016X}", write_size); @@ -533,7 +533,7 @@ private:      }      void EnumerateDeliveryCacheDirectory(Kernel::HLERequestContext& ctx) { -        auto size = ctx.GetWriteBufferSize() / sizeof(DirectoryName); +        auto size = ctx.GetWriteBufferNumElements<DirectoryName>();          LOG_DEBUG(Service_BCAT, "called, size={:016X}", size); diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index ff9b0427c..d183e5829 100644 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp @@ -192,12 +192,10 @@ private:      }      void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) { -        u32 out_entries; -        if (keys.GetCommonTickets().empty()) -            out_entries = 0; -        else -            out_entries = static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(u128)); - +        size_t out_entries = 0; +        if (!keys.GetCommonTickets().empty()) { +            out_entries = ctx.GetWriteBufferNumElements<u128>(); +        }          LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries);          keys.PopulateTickets(); @@ -206,20 +204,19 @@ private:          std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids),                         [](const auto& pair) { return pair.first; }); -        out_entries = static_cast<u32>(std::min<std::size_t>(ids.size(), out_entries)); +        out_entries = std::min(ids.size(), out_entries);          ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128));          IPC::ResponseBuilder rb{ctx, 3};          rb.Push(ResultSuccess); -        rb.Push<u32>(out_entries); +        rb.Push<u32>(static_cast<u32>(out_entries));      }      void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) { -        u32 out_entries; -        if (keys.GetPersonalizedTickets().empty()) -            out_entries = 0; -        else -            out_entries = static_cast<u32>(ctx.GetWriteBufferSize() / sizeof(u128)); +        size_t out_entries = 0; +        if (!keys.GetPersonalizedTickets().empty()) { +            out_entries = ctx.GetWriteBufferNumElements<u128>(); +        }          LOG_DEBUG(Service_ETicket, "called, entries={:016X}", out_entries); @@ -229,12 +226,12 @@ private:          std::transform(tickets.begin(), tickets.end(), std::back_inserter(ids),                         [](const auto& pair) { return pair.first; }); -        out_entries = static_cast<u32>(std::min<std::size_t>(ids.size(), out_entries)); +        out_entries = std::min(ids.size(), out_entries);          ctx.WriteBuffer(ids.data(), out_entries * sizeof(u128));          IPC::ResponseBuilder rb{ctx, 3};          rb.Push(ResultSuccess); -        rb.Push<u32>(out_entries); +        rb.Push<u32>(static_cast<u32>(out_entries));      }      void GetCommonTicketSize(Kernel::HLERequestContext& ctx) { diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index c08274ef9..fbb16a7da 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -277,7 +277,7 @@ private:          LOG_DEBUG(Service_FS, "called.");          // Calculate how many entries we can fit in the output buffer -        const u64 count_entries = ctx.GetWriteBufferSize() / sizeof(FileSys::Entry); +        const u64 count_entries = ctx.GetWriteBufferNumElements<FileSys::Entry>();          // Cap at total number of entries.          const u64 actual_entries = std::min(count_entries, entries.size() - next_entry_index); @@ -543,7 +543,7 @@ public:          LOG_DEBUG(Service_FS, "called");          // Calculate how many entries we can fit in the output buffer -        const u64 count_entries = ctx.GetWriteBufferSize() / sizeof(SaveDataInfo); +        const u64 count_entries = ctx.GetWriteBufferNumElements<SaveDataInfo>();          // Cap at total number of entries.          const u64 actual_entries = std::min(count_entries, info.size() - next_entry_index); diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp index 6df563136..c49c61cff 100644 --- a/src/core/hle/service/ldn/ldn.cpp +++ b/src/core/hle/service/ldn/ldn.cpp @@ -292,7 +292,7 @@ public:      void GetNetworkInfoLatestUpdate(Kernel::HLERequestContext& ctx) {          const std::size_t network_buffer_size = ctx.GetWriteBufferSize(0); -        const std::size_t node_buffer_count = ctx.GetWriteBufferSize(1) / sizeof(NodeLatestUpdate); +        const std::size_t node_buffer_count = ctx.GetWriteBufferNumElements<NodeLatestUpdate>(1);          if (node_buffer_count == 0 || network_buffer_size != sizeof(NetworkInfo)) {              LOG_ERROR(Service_LDN, "Invalid buffer, size = {}, count = {}", network_buffer_size, @@ -333,7 +333,7 @@ public:          const auto channel{rp.PopEnum<WifiChannel>()};          const auto scan_filter{rp.PopRaw<ScanFilter>()}; -        const std::size_t network_info_size = ctx.GetWriteBufferSize() / sizeof(NetworkInfo); +        const std::size_t network_info_size = ctx.GetWriteBufferNumElements<NetworkInfo>();          if (network_info_size == 0) {              LOG_ERROR(Service_LDN, "Invalid buffer size {}", network_info_size); diff --git a/src/core/hle/service/nfc/nfc_user.cpp b/src/core/hle/service/nfc/nfc_user.cpp index 0753333bf..ced2d560b 100644 --- a/src/core/hle/service/nfc/nfc_user.cpp +++ b/src/core/hle/service/nfc/nfc_user.cpp @@ -118,7 +118,7 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) {      }      std::vector<u64> nfp_devices; -    const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64); +    const std::size_t max_allowed_devices = ctx.GetWriteBufferNumElements<u64>();      for (auto& device : devices) {          if (nfp_devices.size() >= max_allowed_devices) { diff --git a/src/core/hle/service/nfp/nfp_user.cpp b/src/core/hle/service/nfp/nfp_user.cpp index 2fe3c0ea0..49816b4c7 100644 --- a/src/core/hle/service/nfp/nfp_user.cpp +++ b/src/core/hle/service/nfp/nfp_user.cpp @@ -104,9 +104,9 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) {      }      std::vector<u64> nfp_devices; -    const std::size_t max_allowed_devices = ctx.GetWriteBufferSize() / sizeof(u64); +    const std::size_t max_allowed_devices = ctx.GetWriteBufferNumElements<u64>(); -    for (auto& device : devices) { +    for (const auto& device : devices) {          if (nfp_devices.size() >= max_allowed_devices) {              continue;          } @@ -115,7 +115,7 @@ void IUser::ListDevices(Kernel::HLERequestContext& ctx) {          }      } -    if (nfp_devices.size() == 0) { +    if (nfp_devices.empty()) {          IPC::ResponseBuilder rb{ctx, 2};          rb.Push(DeviceNotFound);          return; diff --git a/src/core/hle/service/ns/iplatform_service_manager.cpp b/src/core/hle/service/ns/iplatform_service_manager.cpp index fd047ff26..1fab2f0dd 100644 --- a/src/core/hle/service/ns/iplatform_service_manager.cpp +++ b/src/core/hle/service/ns/iplatform_service_manager.cpp @@ -279,13 +279,10 @@ void IPlatformServiceManager::GetSharedFontInOrderOfPriority(Kernel::HLERequestC          font_sizes.push_back(region.size);      } -    // Resize buffers if game requests smaller size output. -    font_codes.resize( -        std::min<std::size_t>(font_codes.size(), ctx.GetWriteBufferSize(0) / sizeof(u32))); -    font_offsets.resize( -        std::min<std::size_t>(font_offsets.size(), ctx.GetWriteBufferSize(1) / sizeof(u32))); -    font_sizes.resize( -        std::min<std::size_t>(font_sizes.size(), ctx.GetWriteBufferSize(2) / sizeof(u32))); +    // Resize buffers if game requests smaller size output +    font_codes.resize(std::min(font_codes.size(), ctx.GetWriteBufferNumElements<u32>(0))); +    font_offsets.resize(std::min(font_offsets.size(), ctx.GetWriteBufferNumElements<u32>(1))); +    font_sizes.resize(std::min(font_sizes.size(), ctx.GetWriteBufferNumElements<u32>(2)));      ctx.WriteBuffer(font_codes, 0);      ctx.WriteBuffer(font_offsets, 1); diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index f761c2da4..4f1a8d6b7 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp @@ -83,7 +83,7 @@ void PushResponseLanguageCode(Kernel::HLERequestContext& ctx, std::size_t num_la  }  void GetAvailableLanguageCodesImpl(Kernel::HLERequestContext& ctx, std::size_t max_entries) { -    const std::size_t requested_amount = ctx.GetWriteBufferSize() / sizeof(LanguageCode); +    const std::size_t requested_amount = ctx.GetWriteBufferNumElements<LanguageCode>();      const std::size_t max_amount = std::min(requested_amount, max_entries);      const std::size_t copy_amount = std::min(available_language_codes.size(), max_amount);      const std::size_t copy_size = copy_amount * sizeof(LanguageCode); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d7f7d336c..b03a30992 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -28,6 +28,10 @@ add_library(video_core STATIC      dirty_flags.h      dma_pusher.cpp      dma_pusher.h +    engines/sw_blitter/blitter.cpp +    engines/sw_blitter/blitter.h +    engines/sw_blitter/converter.cpp +    engines/sw_blitter/converter.h      engines/const_buffer_info.h      engines/engine_interface.h      engines/engine_upload.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 599551013..5d3a8293b 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1742,12 +1742,12 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,      SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));      if constexpr (USE_MEMORY_MAPS) { +        auto upload_staging = runtime.UploadStagingBuffer(copy_size);          std::array copies{BufferCopy{ -            .src_offset = 0, +            .src_offset = upload_staging.offset,              .dst_offset = buffer.Offset(dest_address),              .size = copy_size,          }}; -        auto upload_staging = runtime.UploadStagingBuffer(copy_size);          u8* const src_pointer = upload_staging.mapped_span.data();          std::memcpy(src_pointer, inlined_buffer.data(), copy_size);          runtime.CopyBuffer(buffer, upload_staging.buffer, copies); diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index cdecc3a91..832025d75 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -20,7 +20,7 @@ void ChannelState::Init(Core::System& system, GPU& gpu) {      ASSERT(memory_manager);      dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);      maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager); -    fermi_2d = std::make_unique<Engines::Fermi2D>(); +    fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);      kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);      maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);      kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index a34819234..28aa85f32 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -51,11 +51,11 @@ void State::ProcessData(std::span<const u8> read_buffer) {          } else {              for (u32 line = 0; line < regs.line_count; ++line) {                  const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch; -                memory_manager.WriteBlockUnsafe( -                    dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in, -                    regs.line_length_in); +                std::span<const u8> buffer(read_buffer.data() + +                                               static_cast<size_t>(line) * regs.line_length_in, +                                           regs.line_length_in); +                rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);              } -            memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);          }      } else {          u32 width = regs.dest.width; diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 453e0fb01..c6478ae85 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -3,17 +3,25 @@  #include "common/assert.h"  #include "common/logging/log.h" +#include "common/microprofile.h"  #include "video_core/engines/fermi_2d.h" -#include "video_core/memory_manager.h" +#include "video_core/engines/sw_blitter/blitter.h"  #include "video_core/rasterizer_interface.h"  #include "video_core/surface.h" +#include "video_core/textures/decoders.h" + +MICROPROFILE_DECLARE(GPU_BlitEngine); +MICROPROFILE_DEFINE(GPU_BlitEngine, "GPU", "Blit Engine", MP_RGB(224, 224, 128));  using VideoCore::Surface::BytesPerBlock;  using VideoCore::Surface::PixelFormatFromRenderTargetFormat;  namespace Tegra::Engines { -Fermi2D::Fermi2D() { +using namespace Texture; + +Fermi2D::Fermi2D(MemoryManager& memory_manager_) { +    sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_);      // Nvidia's OpenGL driver seems to assume these values      regs.src.depth = 1;      regs.dst.depth = 1; @@ -42,6 +50,7 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32  }  void Fermi2D::Blit() { +    MICROPROFILE_SCOPE(GPU_BlitEngine);      LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",                regs.src.Address(), regs.dst.Address()); @@ -52,9 +61,16 @@ void Fermi2D::Blit() {      UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");      const auto& args = regs.pixels_from_memory; +    constexpr s64 null_derivate = 1ULL << 32; +    Surface src = regs.src; +    const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); +    const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && +                                 src.format != regs.dst.format;      Config config{          .operation = regs.operation,          .filter = args.sample_mode.filter, +        .must_accelerate = +            args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu,          .dst_x0 = args.dst_x0,          .dst_y0 = args.dst_y0,          .dst_x1 = args.dst_x0 + args.dst_width, @@ -64,8 +80,7 @@ void Fermi2D::Blit() {          .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),          .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),      }; -    Surface src = regs.src; -    const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); +      const auto need_align_to_pitch =          src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch &&          static_cast<s32>(src.width) == config.src_x1 && @@ -78,8 +93,9 @@ void Fermi2D::Blit() {          config.src_x1 -= config.src_x0;          config.src_x0 = 0;      } +      if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { -        UNIMPLEMENTED(); +        sw_blitter->Blit(src, regs.dst, config);      }  } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 1229aa35b..24b518cb5 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -5,6 +5,7 @@  #include <array>  #include <cstddef> +#include <memory>  #include "common/bit_field.h"  #include "common/common_funcs.h"  #include "common/common_types.h" @@ -21,6 +22,10 @@ class RasterizerInterface;  namespace Tegra::Engines { +namespace Blitter { +class SoftwareBlitEngine; +} +  /**   * This Engine is known as G80_2D. Documentation can be found in:   * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml @@ -32,7 +37,7 @@ namespace Tegra::Engines {  class Fermi2D final : public EngineInterface {  public: -    explicit Fermi2D(); +    explicit Fermi2D(MemoryManager& memory_manager_);      ~Fermi2D() override;      /// Binds a rasterizer to this engine. @@ -286,6 +291,7 @@ public:      struct Config {          Operation operation;          Filter filter; +        bool must_accelerate;          s32 dst_x0;          s32 dst_y0;          s32 dst_x1; @@ -298,6 +304,7 @@ public:  private:      VideoCore::RasterizerInterface* rasterizer = nullptr; +    std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;      /// Performs the copy from the source surface to the destination surface as configured in the      /// registers. diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5bb1427c1..6d43e23ea 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -249,9 +249,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume          return;      case MAXWELL3D_REG_INDEX(fragment_barrier):          return rasterizer->FragmentBarrier(); -    case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache): -        rasterizer->InvalidateGPUCache(); -        return rasterizer->WaitForIdle();      case MAXWELL3D_REG_INDEX(tiled_cache_barrier):          return rasterizer->TiledCacheBarrier();      } @@ -511,10 +508,7 @@ void Maxwell3D::ProcessCounterReset() {  void Maxwell3D::ProcessSyncPoint() {      const u32 sync_point = regs.sync_info.sync_point.Value(); -    const u32 cache_flush = regs.sync_info.clean_l2.Value(); -    if (cache_flush != 0) { -        rasterizer->InvalidateGPUCache(); -    } +    [[maybe_unused]] const u32 cache_flush = regs.sync_info.clean_l2.Value();      rasterizer->SignalSyncPoint(sync_point);  } diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1bf6ca2dd..334429514 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -62,7 +62,8 @@ void MaxwellDMA::Launch() {          if (!is_src_pitch && !is_dst_pitch) {              // If both the source and the destination are in block layout, assert. -            UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); +            CopyBlockLinearToBlockLinear(); +            ReleaseSemaphore();              return;          } @@ -291,6 +292,70 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {      memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);  } +void MaxwellDMA::CopyBlockLinearToBlockLinear() { +    UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); + +    const bool is_remapping = regs.launch_dma.remap_enable != 0; + +    // Deswizzle the input and copy it over. +    const Parameters& src = regs.src_params; +    const Parameters& dst = regs.dst_params; + +    const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; +    const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; + +    const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; + +    u32 src_width = src.width; +    u32 dst_width = dst.width; +    u32 x_elements = regs.line_length_in; +    u32 src_x_offset = src.origin.x; +    u32 dst_x_offset = dst.origin.x; +    u32 bpp_shift = 0U; +    if (!is_remapping) { +        bpp_shift = Common::FoldRight( +            4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, +            src_width, dst_width, x_elements, src_x_offset, dst_x_offset, +            static_cast<u32>(regs.offset_in), static_cast<u32>(regs.offset_out)); +        src_width >>= bpp_shift; +        dst_width >>= bpp_shift; +        x_elements >>= bpp_shift; +        src_x_offset >>= bpp_shift; +        dst_x_offset >>= bpp_shift; +    } + +    const u32 bytes_per_pixel = base_bpp << bpp_shift; +    const size_t src_size = CalculateSize(true, bytes_per_pixel, src_width, src.height, src.depth, +                                          src.block_size.height, src.block_size.depth); +    const size_t dst_size = CalculateSize(true, bytes_per_pixel, dst_width, dst.height, dst.depth, +                                          dst.block_size.height, dst.block_size.depth); + +    const u32 pitch = x_elements * bytes_per_pixel; +    const size_t mid_buffer_size = pitch * regs.line_count; + +    if (read_buffer.size() < src_size) { +        read_buffer.resize(src_size); +    } +    if (write_buffer.size() < dst_size) { +        write_buffer.resize(dst_size); +    } + +    intermediate_buffer.resize(mid_buffer_size); + +    memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); +    memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); + +    UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, +                     src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, +                     src.block_size.height, src.block_size.depth, pitch); + +    SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, +                   dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, +                   dst.block_size.height, dst.block_size.depth, pitch); + +    memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); +} +  void MaxwellDMA::ReleaseSemaphore() {      const auto type = regs.launch_dma.semaphore_type;      const GPUVAddr address = regs.semaphore.address; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 953e34adc..d40d3d302 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -223,6 +223,8 @@ private:      void CopyPitchToBlockLinear(); +    void CopyBlockLinearToBlockLinear(); +      void FastCopyBlockLinearToPitch();      void ReleaseSemaphore(); @@ -234,6 +236,7 @@ private:      std::vector<u8> read_buffer;      std::vector<u8> write_buffer; +    std::vector<u8> intermediate_buffer;      static constexpr std::size_t NUM_REGS = 0x800;      struct Regs { diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 4d2278811..c308ba3fc 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -118,7 +118,7 @@ void Puller::ProcessSemaphoreRelease() {      std::function<void()> operation([this, sequence_address, payload] {          memory_manager.Write<u32>(sequence_address, payload);      }); -    rasterizer->SyncOperation(std::move(operation)); +    rasterizer->SignalFence(std::move(operation));  }  void Puller::ProcessSemaphoreAcquire() { @@ -151,8 +151,8 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {      case BufferMethods::SemaphoreAddressLow:      case BufferMethods::SemaphoreSequencePayload:      case BufferMethods::SyncpointPayload: -        break;      case BufferMethods::WrcacheFlush: +        break;      case BufferMethods::RefCnt:          rasterizer->SignalReference();          break; diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp new file mode 100644 index 000000000..2f1ea4626 --- /dev/null +++ b/src/video_core/engines/sw_blitter/blitter.cpp @@ -0,0 +1,238 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <algorithm> +#include <cmath> +#include <vector> + +#include "video_core/engines/sw_blitter/blitter.h" +#include "video_core/engines/sw_blitter/converter.h" +#include "video_core/memory_manager.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" + +namespace Tegra { +class MemoryManager; +} + +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +namespace Tegra::Engines::Blitter { + +using namespace Texture; + +namespace { + +constexpr size_t ir_components = 4; + +void NearestNeighbor(std::span<const u8> input, std::span<u8> output, u32 src_width, u32 src_height, +                     u32 dst_width, u32 dst_height, size_t bpp) { +    const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32)); +    const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32)); +    size_t src_y = 0; +    for (u32 y = 0; y < dst_height; y++) { +        size_t src_x = 0; +        for (u32 x = 0; x < dst_width; x++) { +            const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp; +            const size_t write_to = (y * dst_width + x) * bpp; + +            std::memcpy(&output[write_to], &input[read_from], bpp); +            src_x += dx_du; +        } +        src_y += dy_dv; +    } +} + +void NearestNeighborFast(std::span<const f32> input, std::span<f32> output, u32 src_width, +                         u32 src_height, u32 dst_width, u32 dst_height) { +    const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32)); +    const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32)); +    size_t src_y = 0; +    for (u32 y = 0; y < dst_height; y++) { +        size_t src_x = 0; +        for (u32 x = 0; x < dst_width; x++) { +            const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components; +            const size_t write_to = (y * dst_width + x) * ir_components; + +            std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components); +            src_x += dx_du; +        } +        src_y += dy_dv; +    } +} + +void Bilinear(std::span<const f32> input, std::span<f32> output, size_t src_width, +              size_t src_height, size_t dst_width, size_t dst_height) { +    const auto bilinear_sample = [](std::span<const f32> x0_y0, std::span<const f32> x1_y0, +                                    std::span<const f32> x0_y1, std::span<const f32> x1_y1, +                                    f32 weight_x, f32 weight_y) { +        std::array<f32, ir_components> result{}; +        for (size_t i = 0; i < ir_components; i++) { +            const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x); +            const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x); +            result[i] = std::lerp(a, b, weight_y); +        } +        return result; +    }; +    const f32 dx_du = +        dst_width > 1 ? static_cast<f32>(src_width - 1) / static_cast<f32>(dst_width - 1) : 0.f; +    const f32 dy_dv = +        dst_height > 1 ? static_cast<f32>(src_height - 1) / static_cast<f32>(dst_height - 1) : 0.f; +    for (u32 y = 0; y < dst_height; y++) { +        for (u32 x = 0; x < dst_width; x++) { +            const f32 x_low = std::floor(static_cast<f32>(x) * dx_du); +            const f32 y_low = std::floor(static_cast<f32>(y) * dy_dv); +            const f32 x_high = std::ceil(static_cast<f32>(x) * dx_du); +            const f32 y_high = std::ceil(static_cast<f32>(y) * dy_dv); +            const f32 weight_x = (static_cast<f32>(x) * dx_du) - x_low; +            const f32 weight_y = (static_cast<f32>(y) * dy_dv) - y_low; + +            const auto read_src = [&](f32 in_x, f32 in_y) { +                const size_t read_from = +                    ((static_cast<size_t>(in_x) * src_width + static_cast<size_t>(in_y)) >> 32) * +                    ir_components; +                return std::span<const f32>(&input[read_from], ir_components); +            }; + +            auto x0_y0 = read_src(x_low, y_low); +            auto x1_y0 = read_src(x_high, y_low); +            auto x0_y1 = read_src(x_low, y_high); +            auto x1_y1 = read_src(x_high, y_high); + +            const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y); + +            const size_t write_to = (y * dst_width + x) * ir_components; + +            std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components); +        } +    } +} + +} // namespace + +struct SoftwareBlitEngine::BlitEngineImpl { +    std::vector<u8> tmp_buffer; +    std::vector<u8> src_buffer; +    std::vector<u8> dst_buffer; +    std::vector<f32> intermediate_src; +    std::vector<f32> intermediate_dst; +    ConverterFactory converter_factory; +}; + +SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_) +    : memory_manager{memory_manager_} { +    impl = std::make_unique<BlitEngineImpl>(); +} + +SoftwareBlitEngine::~SoftwareBlitEngine() = default; + +bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, +                              Fermi2D::Config& config) { +    const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) { +        if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) { +            return CalculateSize(true, bytes_per_pixel, surface.width, surface.height, +                                 surface.depth, surface.block_height, surface.block_depth); +        } +        return static_cast<size_t>(surface.pitch * surface.height); +    }; +    const auto process_pitch_linear = [](bool unpack, std::span<const u8> input, +                                         std::span<u8> output, u32 extent_x, u32 extent_y, +                                         u32 pitch, u32 x0, u32 y0, size_t bpp) { +        const size_t base_offset = x0 * bpp; +        const size_t copy_size = extent_x * bpp; +        for (u32 y = y0; y < extent_y; y++) { +            const size_t first_offset = y * pitch + base_offset; +            const size_t second_offset = y * extent_x * bpp; +            u8* write_to = unpack ? &output[first_offset] : &output[second_offset]; +            const u8* read_from = unpack ? &input[second_offset] : &input[first_offset]; +            std::memcpy(write_to, read_from, copy_size); +        } +    }; + +    const u32 src_extent_x = config.src_x1 - config.src_x0; +    const u32 src_extent_y = config.src_y1 - config.src_y0; + +    const u32 dst_extent_x = config.dst_x1 - config.dst_x0; +    const u32 dst_extent_y = config.dst_y1 - config.dst_y0; +    const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); +    const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); +    const size_t src_size = get_surface_size(src, src_bytes_per_pixel); +    impl->tmp_buffer.resize(src_size); +    memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); + +    const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; + +    const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; + +    impl->src_buffer.resize(src_copy_size); + +    const bool no_passthrough = +        src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y; + +    const auto convertion_phase_same_format = [&]() { +        NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y, +                        dst_extent_x, dst_extent_y, dst_bytes_per_pixel); +    }; + +    const auto convertion_phase_ir = [&]() { +        auto* input_converter = impl->converter_factory.GetFormatConverter(src.format); +        impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components); +        impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components); +        input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src); + +        if (config.filter != Fermi2D::Filter::Bilinear) { +            NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, +                                src_extent_y, dst_extent_x, dst_extent_y); +        } else { +            Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y, +                     dst_extent_x, dst_extent_y); +        } + +        auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format); +        output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer); +    }; + +    // Do actuall Blit + +    impl->dst_buffer.resize(dst_copy_size); +    if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { +        UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, +                         src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, +                         src_extent_y, src.block_height, src.block_depth, +                         src_extent_x * src_bytes_per_pixel); +    } else { +        process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, +                             src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); +    } + +    // Conversion Phase +    if (no_passthrough) { +        if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) { +            convertion_phase_ir(); +        } else { +            convertion_phase_same_format(); +        } +    } else { +        impl->dst_buffer.swap(impl->src_buffer); +    } + +    const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); +    impl->tmp_buffer.resize(dst_size); +    memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); + +    if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { +        SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, +                       dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, +                       dst_extent_y, dst.block_height, dst.block_depth, +                       dst_extent_x * dst_bytes_per_pixel); +    } else { +        process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, +                             dst.pitch, config.dst_x0, config.dst_y0, +                             static_cast<size_t>(dst_bytes_per_pixel)); +    } +    memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); +    return true; +} + +} // namespace Tegra::Engines::Blitter diff --git a/src/video_core/engines/sw_blitter/blitter.h b/src/video_core/engines/sw_blitter/blitter.h new file mode 100644 index 000000000..85b55c836 --- /dev/null +++ b/src/video_core/engines/sw_blitter/blitter.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "video_core/engines/fermi_2d.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines::Blitter { + +class SoftwareBlitEngine { +public: +    explicit SoftwareBlitEngine(MemoryManager& memory_manager_); +    ~SoftwareBlitEngine(); + +    bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config); + +private: +    MemoryManager& memory_manager; +    struct BlitEngineImpl; +    std::unique_ptr<BlitEngineImpl> impl; +}; + +} // namespace Tegra::Engines::Blitter diff --git a/src/video_core/engines/sw_blitter/converter.cpp b/src/video_core/engines/sw_blitter/converter.cpp new file mode 100644 index 000000000..cd46dfd4f --- /dev/null +++ b/src/video_core/engines/sw_blitter/converter.cpp @@ -0,0 +1,1234 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <array> +#include <bit> +#include <cmath> +#include <span> +#include <unordered_map> + +#include "common/assert.h" +#include "video_core/engines/sw_blitter/converter.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" + +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#else +#define FORCE_INLINE inline __attribute__((always_inline)) +#endif + +namespace Tegra::Engines::Blitter { + +enum class Swizzle : size_t { +    R = 0, +    G = 1, +    B = 2, +    A = 3, +    None, +}; + +enum class ComponentType : u32 { +    SNORM = 1, +    UNORM = 2, +    SINT = 3, +    UINT = 4, +    SNORM_FORCE_FP16 = 5, +    UNORM_FORCE_FP16 = 6, +    FLOAT = 7, +    SRGB = 8, +}; + +namespace { + +/* + * Note: Use generate_converters.py to generate the structs and searches for new render target + * formats and copy paste them to this file in order to update. just call "python + * generate_converters.py" and get the code from the output. modify the file to add new formats. + */ + +constexpr std::array<f32, 256> SRGB_TO_RGB_LUT = { +    0.000000e+00f, 3.035270e-04f, 6.070540e-04f, 9.105810e-04f, 1.214108e-03f, 1.517635e-03f, +    1.821162e-03f, 2.124689e-03f, 2.428216e-03f, 2.731743e-03f, 3.035270e-03f, 3.346536e-03f, +    3.676507e-03f, 4.024717e-03f, 4.391442e-03f, 4.776953e-03f, 5.181517e-03f, 5.605392e-03f, +    6.048833e-03f, 6.512091e-03f, 6.995410e-03f, 7.499032e-03f, 8.023193e-03f, 8.568126e-03f, +    9.134059e-03f, 9.721218e-03f, 1.032982e-02f, 1.096009e-02f, 1.161224e-02f, 1.228649e-02f, +    1.298303e-02f, 1.370208e-02f, 1.444384e-02f, 1.520851e-02f, 1.599629e-02f, 1.680738e-02f, +    1.764195e-02f, 1.850022e-02f, 1.938236e-02f, 2.028856e-02f, 2.121901e-02f, 2.217389e-02f, +    2.315337e-02f, 2.415763e-02f, 2.518686e-02f, 2.624122e-02f, 2.732089e-02f, 2.842604e-02f, +    2.955684e-02f, 3.071344e-02f, 3.189603e-02f, 3.310477e-02f, 3.433981e-02f, 3.560131e-02f, +    3.688945e-02f, 3.820437e-02f, 3.954624e-02f, 4.091520e-02f, 4.231141e-02f, 4.373503e-02f, +    4.518620e-02f, 4.666509e-02f, 4.817183e-02f, 4.970657e-02f, 5.126946e-02f, 5.286065e-02f, +    5.448028e-02f, 5.612849e-02f, 5.780543e-02f, 5.951124e-02f, 6.124605e-02f, 6.301001e-02f, +    6.480327e-02f, 6.662594e-02f, 6.847817e-02f, 7.036009e-02f, 7.227185e-02f, 7.421357e-02f, +    7.618538e-02f, 7.818742e-02f, 8.021982e-02f, 8.228271e-02f, 8.437621e-02f, 8.650046e-02f, +    8.865558e-02f, 9.084171e-02f, 9.305897e-02f, 9.530747e-02f, 9.758735e-02f, 9.989873e-02f, +    1.022417e-01f, 1.046165e-01f, 1.070231e-01f, 1.094617e-01f, 1.119324e-01f, 1.144354e-01f, +    1.169707e-01f, 1.195384e-01f, 1.221388e-01f, 1.247718e-01f, 1.274377e-01f, 1.301365e-01f, +    1.328683e-01f, 1.356333e-01f, 1.384316e-01f, 1.412633e-01f, 1.441285e-01f, 1.470273e-01f, +    1.499598e-01f, 1.529261e-01f, 1.559265e-01f, 1.589608e-01f, 1.620294e-01f, 1.651322e-01f, +    1.682694e-01f, 1.714411e-01f, 1.746474e-01f, 1.778884e-01f, 1.811642e-01f, 1.844750e-01f, +    1.878208e-01f, 1.912017e-01f, 1.946178e-01f, 1.980693e-01f, 2.015563e-01f, 2.050787e-01f, +    2.086369e-01f, 2.122308e-01f, 2.158605e-01f, 2.195262e-01f, 2.232280e-01f, 2.269659e-01f, +    2.307401e-01f, 2.345506e-01f, 2.383976e-01f, 2.422811e-01f, 2.462013e-01f, 2.501583e-01f, +    2.541521e-01f, 2.581829e-01f, 2.622507e-01f, 2.663556e-01f, 2.704978e-01f, 2.746773e-01f, +    2.788943e-01f, 2.831487e-01f, 2.874408e-01f, 2.917706e-01f, 2.961383e-01f, 3.005438e-01f, +    3.049873e-01f, 3.094689e-01f, 3.139887e-01f, 3.185468e-01f, 3.231432e-01f, 3.277781e-01f, +    3.324515e-01f, 3.371636e-01f, 3.419144e-01f, 3.467041e-01f, 3.515326e-01f, 3.564001e-01f, +    3.613068e-01f, 3.662526e-01f, 3.712377e-01f, 3.762621e-01f, 3.813260e-01f, 3.864294e-01f, +    3.915725e-01f, 3.967552e-01f, 4.019778e-01f, 4.072402e-01f, 4.125426e-01f, 4.178851e-01f, +    4.232677e-01f, 4.286905e-01f, 4.341536e-01f, 4.396572e-01f, 4.452012e-01f, 4.507858e-01f, +    4.564110e-01f, 4.620770e-01f, 4.677838e-01f, 4.735315e-01f, 4.793202e-01f, 4.851499e-01f, +    4.910209e-01f, 4.969330e-01f, 5.028865e-01f, 5.088813e-01f, 5.149177e-01f, 5.209956e-01f, +    5.271151e-01f, 5.332764e-01f, 5.394795e-01f, 5.457245e-01f, 5.520114e-01f, 5.583404e-01f, +    5.647115e-01f, 5.711249e-01f, 5.775805e-01f, 5.840784e-01f, 5.906188e-01f, 5.972018e-01f, +    6.038274e-01f, 6.104956e-01f, 6.172066e-01f, 6.239604e-01f, 6.307572e-01f, 6.375968e-01f, +    6.444797e-01f, 6.514056e-01f, 6.583748e-01f, 6.653873e-01f, 6.724432e-01f, 6.795425e-01f, +    6.866853e-01f, 6.938717e-01f, 7.011019e-01f, 7.083758e-01f, 7.156935e-01f, 7.230551e-01f, +    7.304608e-01f, 7.379104e-01f, 7.454042e-01f, 7.529422e-01f, 7.605245e-01f, 7.681512e-01f, +    7.758222e-01f, 7.835378e-01f, 7.912979e-01f, 7.991027e-01f, 8.069522e-01f, 8.148466e-01f, +    8.227857e-01f, 8.307699e-01f, 8.387990e-01f, 8.468732e-01f, 8.549926e-01f, 8.631572e-01f, +    8.713671e-01f, 8.796224e-01f, 8.879231e-01f, 8.962694e-01f, 9.046612e-01f, 9.130986e-01f, +    9.215819e-01f, 9.301109e-01f, 9.386857e-01f, 9.473065e-01f, 9.559733e-01f, 9.646863e-01f, +    9.734453e-01f, 9.822506e-01f, 9.911021e-01f, 1.000000e+00f}; + +constexpr std::array<f32, 256> RGB_TO_SRGB_LUT = { +    0.000000e+00f, 4.984009e-02f, 8.494473e-02f, 1.107021e-01f, 1.318038e-01f, 1.500052e-01f, +    1.661857e-01f, 1.808585e-01f, 1.943532e-01f, 2.068957e-01f, 2.186491e-01f, 2.297351e-01f, +    2.402475e-01f, 2.502604e-01f, 2.598334e-01f, 2.690152e-01f, 2.778465e-01f, 2.863614e-01f, +    2.945889e-01f, 3.025538e-01f, 3.102778e-01f, 3.177796e-01f, 3.250757e-01f, 3.321809e-01f, +    3.391081e-01f, 3.458689e-01f, 3.524737e-01f, 3.589320e-01f, 3.652521e-01f, 3.714419e-01f, +    3.775084e-01f, 3.834581e-01f, 3.892968e-01f, 3.950301e-01f, 4.006628e-01f, 4.061998e-01f, +    4.116451e-01f, 4.170030e-01f, 4.222770e-01f, 4.274707e-01f, 4.325873e-01f, 4.376298e-01f, +    4.426010e-01f, 4.475037e-01f, 4.523403e-01f, 4.571131e-01f, 4.618246e-01f, 4.664766e-01f, +    4.710712e-01f, 4.756104e-01f, 4.800958e-01f, 4.845292e-01f, 4.889122e-01f, 4.932462e-01f, +    4.975329e-01f, 5.017734e-01f, 5.059693e-01f, 5.101216e-01f, 5.142317e-01f, 5.183006e-01f, +    5.223295e-01f, 5.263194e-01f, 5.302714e-01f, 5.341862e-01f, 5.380651e-01f, 5.419087e-01f, +    5.457181e-01f, 5.494938e-01f, 5.532369e-01f, 5.569480e-01f, 5.606278e-01f, 5.642771e-01f, +    5.678965e-01f, 5.714868e-01f, 5.750484e-01f, 5.785821e-01f, 5.820884e-01f, 5.855680e-01f, +    5.890211e-01f, 5.924487e-01f, 5.958509e-01f, 5.992285e-01f, 6.025819e-01f, 6.059114e-01f, +    6.092176e-01f, 6.125010e-01f, 6.157619e-01f, 6.190008e-01f, 6.222180e-01f, 6.254140e-01f, +    6.285890e-01f, 6.317436e-01f, 6.348780e-01f, 6.379926e-01f, 6.410878e-01f, 6.441637e-01f, +    6.472208e-01f, 6.502595e-01f, 6.532799e-01f, 6.562824e-01f, 6.592672e-01f, 6.622347e-01f, +    6.651851e-01f, 6.681187e-01f, 6.710356e-01f, 6.739363e-01f, 6.768209e-01f, 6.796897e-01f, +    6.825429e-01f, 6.853807e-01f, 6.882034e-01f, 6.910111e-01f, 6.938041e-01f, 6.965826e-01f, +    6.993468e-01f, 7.020969e-01f, 7.048331e-01f, 7.075556e-01f, 7.102645e-01f, 7.129600e-01f, +    7.156424e-01f, 7.183118e-01f, 7.209683e-01f, 7.236121e-01f, 7.262435e-01f, 7.288625e-01f, +    7.314693e-01f, 7.340640e-01f, 7.366470e-01f, 7.392181e-01f, 7.417776e-01f, 7.443256e-01f, +    7.468624e-01f, 7.493880e-01f, 7.519025e-01f, 7.544061e-01f, 7.568989e-01f, 7.593810e-01f, +    7.618526e-01f, 7.643137e-01f, 7.667645e-01f, 7.692052e-01f, 7.716358e-01f, 7.740564e-01f, +    7.764671e-01f, 7.788681e-01f, 7.812595e-01f, 7.836413e-01f, 7.860138e-01f, 7.883768e-01f, +    7.907307e-01f, 7.930754e-01f, 7.954110e-01f, 7.977377e-01f, 8.000556e-01f, 8.023647e-01f, +    8.046651e-01f, 8.069569e-01f, 8.092403e-01f, 8.115152e-01f, 8.137818e-01f, 8.160402e-01f, +    8.182903e-01f, 8.205324e-01f, 8.227665e-01f, 8.249926e-01f, 8.272109e-01f, 8.294214e-01f, +    8.316242e-01f, 8.338194e-01f, 8.360070e-01f, 8.381871e-01f, 8.403597e-01f, 8.425251e-01f, +    8.446831e-01f, 8.468339e-01f, 8.489776e-01f, 8.511142e-01f, 8.532437e-01f, 8.553662e-01f, +    8.574819e-01f, 8.595907e-01f, 8.616927e-01f, 8.637881e-01f, 8.658767e-01f, 8.679587e-01f, +    8.700342e-01f, 8.721032e-01f, 8.741657e-01f, 8.762218e-01f, 8.782716e-01f, 8.803151e-01f, +    8.823524e-01f, 8.843835e-01f, 8.864085e-01f, 8.884274e-01f, 8.904402e-01f, 8.924471e-01f, +    8.944480e-01f, 8.964431e-01f, 8.984324e-01f, 9.004158e-01f, 9.023935e-01f, 9.043654e-01f, +    9.063318e-01f, 9.082925e-01f, 9.102476e-01f, 9.121972e-01f, 9.141413e-01f, 9.160800e-01f, +    9.180133e-01f, 9.199412e-01f, 9.218637e-01f, 9.237810e-01f, 9.256931e-01f, 9.276000e-01f, +    9.295017e-01f, 9.313982e-01f, 9.332896e-01f, 9.351761e-01f, 9.370575e-01f, 9.389339e-01f, +    9.408054e-01f, 9.426719e-01f, 9.445336e-01f, 9.463905e-01f, 9.482424e-01f, 9.500897e-01f, +    9.519322e-01f, 9.537700e-01f, 9.556032e-01f, 9.574316e-01f, 9.592555e-01f, 9.610748e-01f, +    9.628896e-01f, 9.646998e-01f, 9.665055e-01f, 9.683068e-01f, 9.701037e-01f, 9.718961e-01f, +    9.736842e-01f, 9.754679e-01f, 9.772474e-01f, 9.790225e-01f, 9.807934e-01f, 9.825601e-01f, +    9.843225e-01f, 9.860808e-01f, 9.878350e-01f, 9.895850e-01f, 9.913309e-01f, 9.930727e-01f, +    9.948106e-01f, 9.965444e-01f, 9.982741e-01f, 1.000000e+00f}; + +} // namespace + +struct R32G32B32A32_FLOATTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R32G32B32A32_SINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R32G32B32A32_UINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R32G32B32X32_FLOATTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; +}; + +struct R32G32B32X32_SINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; +}; + +struct R32G32B32X32_UINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32, 32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; +}; + +struct R16G16B16A16_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R16G16B16A16_SNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R16G16B16A16_SINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R16G16B16A16_UINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R16G16B16A16_FLOATTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::A}; +}; + +struct R32G32_FLOATTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT, ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R32G32_SINTTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT, ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R32G32_UINTTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT, ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32, 32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R16G16B16X16_FLOATTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16, 16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B, Swizzle::None}; +}; + +struct A8R8G8B8_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct A8R8G8B8_SRGBTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct A2B10G10R10_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct A2B10G10R10_UINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct A2R10G10B10_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {2, 10, 10, 10}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct A8B8G8R8_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct A8B8G8R8_SRGBTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct A8B8G8R8_SNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM, ComponentType::SNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct A8B8G8R8_SINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT, ComponentType::SINT, ComponentType::SINT, ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct A8B8G8R8_UINTTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT, ComponentType::UINT, ComponentType::UINT, ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R16G16_UNORMTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R16G16_SNORMTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SNORM, ComponentType::SNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R16G16_SINTTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT, ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R16G16_UINTTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT, ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R16G16_FLOATTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT, ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16, 16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct B10G11R11_FLOATTraits { +    static constexpr size_t num_components = 3; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT, ComponentType::FLOAT, ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {10, 11, 11}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct R32_SINTTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R32_UINTTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R32_FLOATTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {32}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct X8R8G8B8_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct X8R8G8B8_SRGBTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct R5G6B5_UNORMTraits { +    static constexpr size_t num_components = 3; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {5, 6, 5}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct A1R5G5B5_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {1, 5, 5, 5}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::A, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct R8G8_UNORMTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R8G8_SNORMTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SNORM, ComponentType::SNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R8G8_SINTTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT, ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R8G8_UINTTraits { +    static constexpr size_t num_components = 2; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT, ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R, +                                                                              Swizzle::G}; +}; + +struct R16_UNORMTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R16_SNORMTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R16_SINTTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R16_UINTTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R16_FLOATTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::FLOAT}; +    static constexpr std::array<size_t, num_components> component_sizes = {16}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R8_UNORMTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R8_SNORMTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R8_SINTTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct R8_UINTTraits { +    static constexpr size_t num_components = 1; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UINT}; +    static constexpr std::array<size_t, num_components> component_sizes = {8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = {Swizzle::R}; +}; + +struct X1R5G5B5_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {1, 5, 5, 5}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::None, Swizzle::R, Swizzle::G, Swizzle::B}; +}; + +struct X8B8G8R8_UNORMTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM, ComponentType::UNORM}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +struct X8B8G8R8_SRGBTraits { +    static constexpr size_t num_components = 4; +    static constexpr std::array<ComponentType, num_components> component_types = { +        ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB, ComponentType::SRGB}; +    static constexpr std::array<size_t, num_components> component_sizes = {8, 8, 8, 8}; +    static constexpr std::array<Swizzle, num_components> component_swizzle = { +        Swizzle::None, Swizzle::B, Swizzle::G, Swizzle::R}; +}; + +template <class ConverterTraits> +class ConverterImpl : public Converter { +private: +    static constexpr size_t num_components = ConverterTraits::num_components; +    static constexpr std::array<ComponentType, num_components> component_types = +        ConverterTraits::component_types; +    static constexpr std::array<size_t, num_components> component_sizes = +        ConverterTraits::component_sizes; +    static constexpr std::array<Swizzle, num_components> component_swizzle = +        ConverterTraits::component_swizzle; + +    static constexpr size_t CalculateByteSize() { +        size_t size = 0; +        for (const size_t component_size : component_sizes) { +            size += component_size; +        } +        const size_t power = (sizeof(size_t) * 8) - std::countl_zero(size) - 1ULL; +        const size_t base_size = 1ULL << power; +        const size_t mask = base_size - 1ULL; +        return ((size & mask) != 0 ? base_size << 1ULL : base_size) / 8; +    } + +    static constexpr size_t total_bytes_per_pixel = CalculateByteSize(); +    static constexpr size_t total_words_per_pixel = +        (total_bytes_per_pixel + sizeof(u32) - 1U) / sizeof(u32); +    static constexpr size_t components_per_ir_rep = 4; + +    template <bool get_offsets> +    static constexpr std::array<size_t, num_components> GetBoundWordsOffsets() { +        std::array<size_t, num_components> result; +        result.fill(0); +        constexpr size_t total_bits_per_word = sizeof(u32) * 8; +        size_t accumulated_size = 0; +        size_t count = 0; +        for (size_t i = 0; i < num_components; i++) { +            if constexpr (get_offsets) { +                result[i] = accumulated_size; +            } else { +                result[i] = count; +            } +            accumulated_size += component_sizes[i]; +            if (accumulated_size > total_bits_per_word) { +                if constexpr (get_offsets) { +                    result[i] = 0; +                } else { +                    result[i]++; +                } +                count++; +                accumulated_size = component_sizes[i]; +            } +        } +        return result; +    } + +    static constexpr std::array<size_t, num_components> bound_words = GetBoundWordsOffsets<false>(); +    static constexpr std::array<size_t, num_components> bound_offsets = +        GetBoundWordsOffsets<true>(); + +    static constexpr std::array<u32, num_components> GetComponentsMask() { +        std::array<u32, num_components> result; +        for (size_t i = 0; i < num_components; i++) { +            result[i] = (((u32)~0) >> (8 * sizeof(u32) - component_sizes[i])) << bound_offsets[i]; +        } +        return result; +    } + +    static constexpr std::array<u32, num_components> component_mask = GetComponentsMask(); + +    // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function +    // calls, it may fail to detect the benefit of inlining. +    template <size_t which_component> +    FORCE_INLINE void ConvertToComponent(u32 which_word, f32& out_component) { +        const u32 value = (which_word >> bound_offsets[which_component]) & +                          static_cast<u32>((1ULL << component_sizes[which_component]) - 1ULL); +        const auto sign_extend = [](u32 base_value, size_t bits) { +            const size_t shift_amount = sizeof(u32) * 8 - bits; +            s32 shifted_value = static_cast<s32>(base_value << shift_amount); +            return shifted_value >> shift_amount; +        }; +        const auto force_to_fp16 = [](f32 base_value) { +            u32 tmp = std::bit_cast<u32>(base_value); +            constexpr size_t fp32_mantissa_bits = 23; +            constexpr size_t fp16_mantissa_bits = 10; +            constexpr size_t mantissa_mask = +                ~((1ULL << (fp32_mantissa_bits - fp16_mantissa_bits)) - 1ULL); +            tmp = tmp & static_cast<u32>(mantissa_mask); +            // TODO: force the exponent within the range of half float. Not needed in UNORM / SNORM +            return std::bit_cast<f32>(tmp); +        }; +        const auto from_fp_n = [&sign_extend](u32 base_value, size_t bits, size_t mantissa) { +            constexpr size_t fp32_mantissa_bits = 23; +            size_t shift_towards = fp32_mantissa_bits - mantissa; +            const u32 new_value = +                static_cast<u32>(sign_extend(base_value, bits) << shift_towards) & (~(1U << 31)); +            return std::bit_cast<f32>(new_value); +        }; +        const auto calculate_snorm = [&]() { +            return static_cast<f32>( +                static_cast<f32>(sign_extend(value, component_sizes[which_component])) / +                static_cast<f32>((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); +        }; +        const auto calculate_unorm = [&]() { +            return static_cast<f32>( +                static_cast<f32>(value) / +                static_cast<f32>((1ULL << (component_sizes[which_component])) - 1ULL)); +        }; +        if constexpr (component_types[which_component] == ComponentType::SNORM) { +            out_component = calculate_snorm(); +        } else if constexpr (component_types[which_component] == ComponentType::UNORM) { +            out_component = calculate_unorm(); +        } else if constexpr (component_types[which_component] == ComponentType::SINT) { +            out_component = static_cast<f32>( +                static_cast<s32>(sign_extend(value, component_sizes[which_component]))); +        } else if constexpr (component_types[which_component] == ComponentType::UINT) { +            out_component = static_cast<f32>( +                static_cast<s32>(sign_extend(value, component_sizes[which_component]))); +        } else if constexpr (component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { +            out_component = calculate_snorm(); +            out_component = force_to_fp16(out_component); +        } else if constexpr (component_types[which_component] == ComponentType::UNORM_FORCE_FP16) { +            out_component = calculate_unorm(); +            out_component = force_to_fp16(out_component); +        } else if constexpr (component_types[which_component] == ComponentType::FLOAT) { +            if constexpr (component_sizes[which_component] == 32) { +                out_component = std::bit_cast<f32>(value); +            } else if constexpr (component_sizes[which_component] == 16) { +                static constexpr u32 sign_mask = 0x8000; +                static constexpr u32 mantissa_mask = 0x8000; +                out_component = std::bit_cast<f32>(((value & sign_mask) << 16) | +                                                   (((value & 0x7c00) + 0x1C000) << 13) | +                                                   ((value & mantissa_mask) << 13)); +            } else { +                out_component = from_fp_n(value, component_sizes[which_component], +                                          component_sizes[which_component] - 5); +            } +        } else if constexpr (component_types[which_component] == ComponentType::SRGB) { +            if constexpr (component_swizzle[which_component] == Swizzle::A) { +                out_component = calculate_unorm(); +            } else if constexpr (component_sizes[which_component] == 8) { +                out_component = SRGB_TO_RGB_LUT[value]; +            } else { +                out_component = calculate_unorm(); +                UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented", +                                  component_sizes[which_component]); +            } +        } +    } + +    // We are forcing inline so the compiler can SIMD the conversations, since it may do 4 function +    // calls, it may fail to detect the benefit of inlining. +    template <size_t which_component> +    FORCE_INLINE void ConvertFromComponent(u32& which_word, f32 in_component) { +        const auto insert_to_word = [&]<typename T>(T new_word) { +            which_word |= (static_cast<u32>(new_word) << bound_offsets[which_component]) & +                          component_mask[which_component]; +        }; +        const auto to_fp_n = [](f32 base_value, size_t bits, size_t mantissa) { +            constexpr size_t fp32_mantissa_bits = 23; +            u32 tmp_value = std::bit_cast<u32>(std::max(base_value, 0.0f)); +            size_t shift_towards = fp32_mantissa_bits - mantissa; +            return tmp_value >> shift_towards; +        }; +        const auto calculate_unorm = [&]() { +            return static_cast<u32>( +                static_cast<f32>(in_component) * +                static_cast<f32>((1ULL << (component_sizes[which_component])) - 1ULL)); +        }; +        if constexpr (component_types[which_component] == ComponentType::SNORM || +                      component_types[which_component] == ComponentType::SNORM_FORCE_FP16) { +            s32 tmp_word = static_cast<s32>( +                static_cast<f32>(in_component) * +                static_cast<f32>((1ULL << (component_sizes[which_component] - 1ULL)) - 1ULL)); +            insert_to_word(tmp_word); + +        } else if constexpr (component_types[which_component] == ComponentType::UNORM || +                             component_types[which_component] == ComponentType::UNORM_FORCE_FP16) { +            u32 tmp_word = calculate_unorm(); +            insert_to_word(tmp_word); +        } else if constexpr (component_types[which_component] == ComponentType::SINT) { +            s32 tmp_word = static_cast<s32>(in_component); +            insert_to_word(tmp_word); +        } else if constexpr (component_types[which_component] == ComponentType::UINT) { +            u32 tmp_word = static_cast<u32>(in_component); +            insert_to_word(tmp_word); +        } else if constexpr (component_types[which_component] == ComponentType::FLOAT) { +            if constexpr (component_sizes[which_component] == 32) { +                u32 tmp_word = std::bit_cast<u32>(in_component); +                insert_to_word(tmp_word); +            } else if constexpr (component_sizes[which_component] == 16) { +                static constexpr u32 sign_mask = 0x8000; +                static constexpr u32 mantissa_mask = 0x03ff; +                static constexpr u32 exponent_mask = 0x7c00; +                const u32 tmp_word = std::bit_cast<u32>(in_component); +                const u32 half = ((tmp_word >> 16) & sign_mask) | +                                 ((((tmp_word & 0x7f800000) - 0x38000000) >> 13) & exponent_mask) | +                                 ((tmp_word >> 13) & mantissa_mask); +                insert_to_word(half); +            } else { +                insert_to_word(to_fp_n(in_component, component_sizes[which_component], +                                       component_sizes[which_component] - 5)); +            } +        } else if constexpr (component_types[which_component] == ComponentType::SRGB) { +            if constexpr (component_swizzle[which_component] != Swizzle::A) { +                if constexpr (component_sizes[which_component] == 8) { +                    const u32 index = calculate_unorm(); +                    in_component = RGB_TO_SRGB_LUT[index]; +                } else { +                    UNIMPLEMENTED_MSG("SRGB Conversion with component sizes of {} is unimplemented", +                                      component_sizes[which_component]); +                } +            } +            const u32 tmp_word = calculate_unorm(); +            insert_to_word(tmp_word); +        } +    } + +public: +    void ConvertTo(std::span<const u8> input, std::span<f32> output) override { +        const size_t num_pixels = output.size() / components_per_ir_rep; +        for (size_t pixel = 0; pixel < num_pixels; pixel++) { +            std::array<u32, total_words_per_pixel> words{}; + +            std::memcpy(words.data(), &input[pixel * total_bytes_per_pixel], total_bytes_per_pixel); +            std::span<f32> new_components(&output[pixel * components_per_ir_rep], +                                          components_per_ir_rep); +            if constexpr (component_swizzle[0] != Swizzle::None) { +                ConvertToComponent<0>(words[bound_words[0]], +                                      new_components[static_cast<size_t>(component_swizzle[0])]); +            } else { +                new_components[0] = 0.0f; +            } +            if constexpr (num_components >= 2) { +                if constexpr (component_swizzle[1] != Swizzle::None) { +                    ConvertToComponent<1>( +                        words[bound_words[1]], +                        new_components[static_cast<size_t>(component_swizzle[1])]); +                } else { +                    new_components[1] = 0.0f; +                } +            } else { +                new_components[1] = 0.0f; +            } +            if constexpr (num_components >= 3) { +                if constexpr (component_swizzle[2] != Swizzle::None) { +                    ConvertToComponent<2>( +                        words[bound_words[2]], +                        new_components[static_cast<size_t>(component_swizzle[2])]); +                } else { +                    new_components[2] = 0.0f; +                } +            } else { +                new_components[2] = 0.0f; +            } +            if constexpr (num_components >= 4) { +                if constexpr (component_swizzle[3] != Swizzle::None) { +                    ConvertToComponent<3>( +                        words[bound_words[3]], +                        new_components[static_cast<size_t>(component_swizzle[3])]); +                } else { +                    new_components[3] = 0.0f; +                } +            } else { +                new_components[3] = 0.0f; +            } +        } +    } + +    void ConvertFrom(std::span<const f32> input, std::span<u8> output) override { +        const size_t num_pixels = output.size() / total_bytes_per_pixel; +        for (size_t pixel = 0; pixel < num_pixels; pixel++) { +            std::span<const f32> old_components(&input[pixel * components_per_ir_rep], +                                                components_per_ir_rep); +            std::array<u32, total_words_per_pixel> words{}; +            if constexpr (component_swizzle[0] != Swizzle::None) { +                ConvertFromComponent<0>(words[bound_words[0]], +                                        old_components[static_cast<size_t>(component_swizzle[0])]); +            } +            if constexpr (num_components >= 2) { +                if constexpr (component_swizzle[1] != Swizzle::None) { +                    ConvertFromComponent<1>( +                        words[bound_words[1]], +                        old_components[static_cast<size_t>(component_swizzle[1])]); +                } +            } +            if constexpr (num_components >= 3) { +                if constexpr (component_swizzle[2] != Swizzle::None) { +                    ConvertFromComponent<2>( +                        words[bound_words[2]], +                        old_components[static_cast<size_t>(component_swizzle[2])]); +                } +            } +            if constexpr (num_components >= 4) { +                if constexpr (component_swizzle[3] != Swizzle::None) { +                    ConvertFromComponent<3>( +                        words[bound_words[3]], +                        old_components[static_cast<size_t>(component_swizzle[3])]); +                } +            } +            std::memcpy(&output[pixel * total_bytes_per_pixel], words.data(), +                        total_bytes_per_pixel); +        } +    } + +    ConverterImpl() = default; +    ~ConverterImpl() override = default; +}; + +struct ConverterFactory::ConverterFactoryImpl { +    std::unordered_map<RenderTargetFormat, std::unique_ptr<Converter>> converters_cache; +}; + +ConverterFactory::ConverterFactory() { +    impl = std::make_unique<ConverterFactoryImpl>(); +} + +ConverterFactory::~ConverterFactory() = default; + +Converter* ConverterFactory::GetFormatConverter(RenderTargetFormat format) { +    auto it = impl->converters_cache.find(format); +    if (it == impl->converters_cache.end()) [[unlikely]] { +        return BuildConverter(format); +    } +    return it->second.get(); +} + +class NullConverter : public Converter { +public: +    void ConvertTo([[maybe_unused]] std::span<const u8> input, std::span<f32> output) override { +        std::fill(output.begin(), output.end(), 0.0f); +    } +    void ConvertFrom([[maybe_unused]] std::span<const f32> input, std::span<u8> output) override { +        const u8 fill_value = 0U; +        std::fill(output.begin(), output.end(), fill_value); +    } +    NullConverter() = default; +    ~NullConverter() = default; +}; + +Converter* ConverterFactory::BuildConverter(RenderTargetFormat format) { +    switch (format) { +    case RenderTargetFormat::R32G32B32A32_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32G32B32A32_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32G32B32A32_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32B32A32_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32G32B32X32_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32G32B32X32_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32G32B32X32_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32B32X32_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16B16A16_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16B16A16_SNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16B16A16_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16B16A16_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16B16A16_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16B16A16_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32G32_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32G32_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32G32_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32G32_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16B16X16_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16B16X16_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A8R8G8B8_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A8R8G8B8_SRGB: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A8R8G8B8_SRGBTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A2B10G10R10_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A2B10G10R10_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A2B10G10R10_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A2R10G10B10_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A2R10G10B10_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A8B8G8R8_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A8B8G8R8_SRGB: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SRGBTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A8B8G8R8_SNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A8B8G8R8_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A8B8G8R8_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A8B8G8R8_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16_SNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16_SNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16G16_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16G16_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::B10G11R11_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<B10G11R11_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R32_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R32_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::X8R8G8B8_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<X8R8G8B8_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::X8R8G8B8_SRGB: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<X8R8G8B8_SRGBTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R5G6B5_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R5G6B5_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::A1R5G5B5_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<A1R5G5B5_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R8G8_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R8G8_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R8G8_SNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R8G8_SNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R8G8_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R8G8_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R8G8_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R8G8_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16_SNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16_SNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R16_FLOAT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R16_FLOATTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R8_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R8_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R8_SNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R8_SNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R8_SINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R8_SINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::R8_UINT: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<R8_UINTTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::X1R5G5B5_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<X1R5G5B5_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::X8B8G8R8_UNORM: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<X8B8G8R8_UNORMTraits>>()) +            .first->second.get(); +        break; +    case RenderTargetFormat::X8B8G8R8_SRGB: +        return impl->converters_cache +            .emplace(format, std::make_unique<ConverterImpl<X8B8G8R8_SRGBTraits>>()) +            .first->second.get(); +        break; +    default: { +        UNIMPLEMENTED_MSG("This format {} converter is not implemented", format); +        return impl->converters_cache.emplace(format, std::make_unique<NullConverter>()) +            .first->second.get(); +    } +    } +} + +} // namespace Tegra::Engines::Blitter diff --git a/src/video_core/engines/sw_blitter/converter.h b/src/video_core/engines/sw_blitter/converter.h new file mode 100644 index 000000000..f9bdc516e --- /dev/null +++ b/src/video_core/engines/sw_blitter/converter.h @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <memory> +#include <span> + +#include "common/common_types.h" + +#include "video_core/gpu.h" + +namespace Tegra::Engines::Blitter { + +class Converter { +public: +    virtual void ConvertTo(std::span<const u8> input, std::span<f32> output) = 0; +    virtual void ConvertFrom(std::span<const f32> input, std::span<u8> output) = 0; +    virtual ~Converter() = default; +}; + +class ConverterFactory { +public: +    ConverterFactory(); +    ~ConverterFactory(); + +    Converter* GetFormatConverter(RenderTargetFormat format); + +private: +    Converter* BuildConverter(RenderTargetFormat format); + +    struct ConverterFactoryImpl; +    std::unique_ptr<ConverterFactoryImpl> impl; +}; + +} // namespace Tegra::Engines::Blitter diff --git a/src/video_core/engines/sw_blitter/generate_converters.py b/src/video_core/engines/sw_blitter/generate_converters.py new file mode 100644 index 000000000..f641564f7 --- /dev/null +++ b/src/video_core/engines/sw_blitter/generate_converters.py @@ -0,0 +1,136 @@ +# SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later + +import re + +class Format: +    def __init__(self, string_value): +        self.name = string_value +        tmp = string_value.split('_') +        self.component_type = tmp[1] +        component_data = re.findall(r"\w\d+", tmp[0]) +        self.num_components = len(component_data) +        sizes = [] +        swizzle = [] +        for data in component_data: +            swizzle.append(data[0]) +            sizes.append(int(data[1:])) +        self.sizes = sizes +        self.swizzle = swizzle + +    def build_component_type_array(self): +        result = "{ " +        b = False +        for i in range(0, self.num_components): +            if b: +                result += ", " +            b = True +            result += "ComponentType::" + self.component_type +        result += " }" +        return result + +    def build_component_sizes_array(self): +        result = "{ " +        b = False +        for i in range(0, self.num_components): +            if b: +                result += ", " +            b = True +            result += str(self.sizes[i]) +        result += " }" +        return result + +    def build_component_swizzle_array(self): +        result = "{ " +        b = False +        for i in range(0, self.num_components): +            if b: +                result += ", " +            b = True +            swizzle = self.swizzle[i] +            if swizzle == "X": +                swizzle = "None" +            result += "Swizzle::" + swizzle +        result += " }" +        return result + +    def print_declaration(self): +        print("struct " + self.name + "Traits {") +        print("  static constexpr size_t num_components = " + str(self.num_components) + ";") +        print("  static constexpr std::array<ComponentType, num_components> component_types = " + self.build_component_type_array() + ";") +        print("  static constexpr std::array<size_t, num_components> component_sizes = " + self.build_component_sizes_array() + ";") +        print("  static constexpr std::array<Swizzle, num_components> component_swizzle = " + self.build_component_swizzle_array() + ";") +        print("};\n") + +    def print_case(self): +        print("case RenderTargetFormat::" + self.name + ":") +        print("  return impl->converters_cache") +        print("    .emplace(format, std::make_unique<ConverterImpl<" + self.name + "Traits>>())") +        print("    .first->second.get();") +        print("  break;") + +txt = """ +R32G32B32A32_FLOAT +R32G32B32A32_SINT +R32G32B32A32_UINT +R32G32B32X32_FLOAT +R32G32B32X32_SINT +R32G32B32X32_UINT +R16G16B16A16_UNORM +R16G16B16A16_SNORM +R16G16B16A16_SINT +R16G16B16A16_UINT +R16G16B16A16_FLOAT +R32G32_FLOAT +R32G32_SINT +R32G32_UINT +R16G16B16X16_FLOAT +A8R8G8B8_UNORM +A8R8G8B8_SRGB +A2B10G10R10_UNORM +A2B10G10R10_UINT +A2R10G10B10_UNORM +A8B8G8R8_UNORM +A8B8G8R8_SRGB +A8B8G8R8_SNORM +A8B8G8R8_SINT +A8B8G8R8_UINT +R16G16_UNORM +R16G16_SNORM +R16G16_SINT +R16G16_UINT +R16G16_FLOAT +B10G11R11_FLOAT +R32_SINT +R32_UINT +R32_FLOAT +X8R8G8B8_UNORM +X8R8G8B8_SRGB +R5G6B5_UNORM +A1R5G5B5_UNORM +R8G8_UNORM +R8G8_SNORM +R8G8_SINT +R8G8_UINT +R16_UNORM +R16_SNORM +R16_SINT +R16_UINT +R16_FLOAT +R8_UNORM +R8_SNORM +R8_SINT +R8_UINT +X1R5G5B5_UNORM +X8B8G8R8_UNORM +X8B8G8R8_SRGB +""" + +x = txt.split() +y = list(map(lambda a: Format(a), x)) +formats = list(y) +for format in formats: +  format.print_declaration() + +for format in formats: +  format.print_case() diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index d0709dc69..8a871593a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -27,12 +27,12 @@ struct CommandList;  // TODO: Implement the commented ones  enum class RenderTargetFormat : u32 {      NONE = 0x0, -    R32B32G32A32_FLOAT = 0xC0, +    R32G32B32A32_FLOAT = 0xC0,      R32G32B32A32_SINT = 0xC1,      R32G32B32A32_UINT = 0xC2, -    // R32G32B32X32_FLOAT = 0xC3, -    // R32G32B32X32_SINT = 0xC4, -    // R32G32B32X32_UINT = 0xC5, +    R32G32B32X32_FLOAT = 0xC3, +    R32G32B32X32_SINT = 0xC4, +    R32G32B32X32_UINT = 0xC5,      R16G16B16A16_UNORM = 0xC6,      R16G16B16A16_SNORM = 0xC7,      R16G16B16A16_SINT = 0xC8, @@ -56,13 +56,13 @@ enum class RenderTargetFormat : u32 {      R16G16_SINT = 0xDC,      R16G16_UINT = 0xDD,      R16G16_FLOAT = 0xDE, -    // A2R10G10B10_UNORM = 0xDF, +    A2R10G10B10_UNORM = 0xDF,      B10G11R11_FLOAT = 0xE0,      R32_SINT = 0xE3,      R32_UINT = 0xE4,      R32_FLOAT = 0xE5, -    // X8R8G8B8_UNORM = 0xE6, -    // X8R8G8B8_SRGB = 0xE7, +    X8R8G8B8_UNORM = 0xE6, +    X8R8G8B8_SRGB = 0xE7,      R5G6B5_UNORM = 0xE8,      A1R5G5B5_UNORM = 0xE9,      R8G8_UNORM = 0xEA, @@ -79,11 +79,11 @@ enum class RenderTargetFormat : u32 {      R8_SINT = 0xF5,      R8_UINT = 0xF6, -    /* -    A8_UNORM = 0xF7, +    // A8_UNORM = 0xF7,      X1R5G5B5_UNORM = 0xF8,      X8B8G8R8_UNORM = 0xF9,      X8B8G8R8_SRGB = 0xFA, +    /*      Z1R5G5B5_UNORM = 0xFB,      O1R5G5B5_UNORM = 0xFC,      Z8R8G8B8_UNORM = 0xFD, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 354c6e429..f71a316b6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -466,8 +466,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf                                               const Tegra::Engines::Fermi2D::Config& copy_config) {      MICROPROFILE_SCOPE(OpenGL_Blits);      std::scoped_lock lock{texture_cache.mutex}; -    texture_cache.BlitImage(dst, src, copy_config); -    return true; +    return texture_cache.BlitImage(dst, src, copy_config);  }  Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index e14f9b2db..ef1190e1f 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -28,6 +28,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB      {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM      {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM      {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT +    {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2R10G10B10_UNORM      {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM      {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1},                 // A5B5G5R1_UNORM      {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index b4f5ee665..430a84272 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -125,6 +125,7 @@ struct FormatTuple {      {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},              // A1R5G5B5_UNORM      {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM      {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage},  // A2B10G10R10_UINT +    {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM      {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable},         // A1B5G5R5_UNORM (flipped with swizzle)      {VK_FORMAT_R5G5B5A1_UNORM_PACK16},                     // A5B5G5R1_UNORM (specially swizzled)      {VK_FORMAT_R8_UNORM, Attachable | Storage},            // R8_UNORM diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 12b13cc59..d8ad8815c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -542,8 +542,7 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf                                               const Tegra::Engines::Fermi2D::Surface& dst,                                               const Tegra::Engines::Fermi2D::Config& copy_config) {      std::scoped_lock lock{texture_cache.mutex}; -    texture_cache.BlitImage(dst, src, copy_config); -    return true; +    return texture_cache.BlitImage(dst, src, copy_config);  }  Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6bd133d10..b618e1a25 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -93,11 +93,14 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {  PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {      switch (format) { -    case Tegra::RenderTargetFormat::R32B32G32A32_FLOAT: +    case Tegra::RenderTargetFormat::R32G32B32A32_FLOAT: +    case Tegra::RenderTargetFormat::R32G32B32X32_FLOAT:          return PixelFormat::R32G32B32A32_FLOAT;      case Tegra::RenderTargetFormat::R32G32B32A32_SINT: +    case Tegra::RenderTargetFormat::R32G32B32X32_SINT:          return PixelFormat::R32G32B32A32_SINT;      case Tegra::RenderTargetFormat::R32G32B32A32_UINT: +    case Tegra::RenderTargetFormat::R32G32B32X32_UINT:          return PixelFormat::R32G32B32A32_UINT;      case Tegra::RenderTargetFormat::R16G16B16A16_UNORM:          return PixelFormat::R16G16B16A16_UNORM; @@ -118,16 +121,22 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)      case Tegra::RenderTargetFormat::R16G16B16X16_FLOAT:          return PixelFormat::R16G16B16X16_FLOAT;      case Tegra::RenderTargetFormat::A8R8G8B8_UNORM: +    case Tegra::RenderTargetFormat::X8R8G8B8_UNORM:          return PixelFormat::B8G8R8A8_UNORM;      case Tegra::RenderTargetFormat::A8R8G8B8_SRGB: +    case Tegra::RenderTargetFormat::X8R8G8B8_SRGB:          return PixelFormat::B8G8R8A8_SRGB;      case Tegra::RenderTargetFormat::A2B10G10R10_UNORM:          return PixelFormat::A2B10G10R10_UNORM;      case Tegra::RenderTargetFormat::A2B10G10R10_UINT:          return PixelFormat::A2B10G10R10_UINT; +    case Tegra::RenderTargetFormat::A2R10G10B10_UNORM: +        return PixelFormat::A2R10G10B10_UNORM;      case Tegra::RenderTargetFormat::A8B8G8R8_UNORM: +    case Tegra::RenderTargetFormat::X8B8G8R8_UNORM:          return PixelFormat::A8B8G8R8_UNORM;      case Tegra::RenderTargetFormat::A8B8G8R8_SRGB: +    case Tegra::RenderTargetFormat::X8B8G8R8_SRGB:          return PixelFormat::A8B8G8R8_SRGB;      case Tegra::RenderTargetFormat::A8B8G8R8_SNORM:          return PixelFormat::A8B8G8R8_SNORM; @@ -156,6 +165,7 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)      case Tegra::RenderTargetFormat::R5G6B5_UNORM:          return PixelFormat::R5G6B5_UNORM;      case Tegra::RenderTargetFormat::A1R5G5B5_UNORM: +    case Tegra::RenderTargetFormat::X1R5G5B5_UNORM:          return PixelFormat::A1R5G5B5_UNORM;      case Tegra::RenderTargetFormat::R8G8_UNORM:          return PixelFormat::R8G8_UNORM; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 57ca7f597..44b79af20 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -23,6 +23,7 @@ enum class PixelFormat {      A1R5G5B5_UNORM,      A2B10G10R10_UNORM,      A2B10G10R10_UINT, +    A2R10G10B10_UNORM,      A1B5G5R5_UNORM,      A5B5G5R1_UNORM,      R8_UNORM, @@ -159,6 +160,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{      1,  // A1R5G5B5_UNORM      1,  // A2B10G10R10_UNORM      1,  // A2B10G10R10_UINT +    1,  // A2R10G10B10_UNORM      1,  // A1B5G5R5_UNORM      1,  // A5B5G5R1_UNORM      1,  // R8_UNORM @@ -264,6 +266,7 @@ constexpr std::array<u8, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{      1,  // A1R5G5B5_UNORM      1,  // A2B10G10R10_UNORM      1,  // A2B10G10R10_UINT +    1,  // A2R10G10B10_UNORM      1,  // A1B5G5R5_UNORM      1,  // A5B5G5R1_UNORM      1,  // R8_UNORM @@ -369,6 +372,7 @@ constexpr std::array<u8, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{      16,  // A1R5G5B5_UNORM      32,  // A2B10G10R10_UNORM      32,  // A2B10G10R10_UINT +    32,  // A2R10G10B10_UNORM      16,  // A1B5G5R5_UNORM      16,  // A5B5G5R1_UNORM      8,   // R8_UNORM diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index acc854715..f1f0a057b 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -35,6 +35,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str                  return "A2B10G10R10_UNORM";              case PixelFormat::A2B10G10R10_UINT:                  return "A2B10G10R10_UINT"; +            case PixelFormat::A2R10G10B10_UNORM: +                return "A2R10G10B10_UNORM";              case PixelFormat::A1B5G5R5_UNORM:                  return "A1B5G5R5_UNORM";              case PixelFormat::A5B5G5R1_UNORM: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8ef75fe73..8e68a2e53 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -506,10 +506,14 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz  }  template <class P> -void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, +bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,                                  const Tegra::Engines::Fermi2D::Surface& src,                                  const Tegra::Engines::Fermi2D::Config& copy) { -    const BlitImages images = GetBlitImages(dst, src, copy); +    const auto result = GetBlitImages(dst, src, copy); +    if (!result) { +        return false; +    } +    const BlitImages images = *result;      const ImageId dst_id = images.dst_id;      const ImageId src_id = images.src_id; @@ -596,6 +600,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,          runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,                            copy.operation);      } +    return true;  }  template <class P> @@ -1133,7 +1138,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA  }  template <class P> -typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( +std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImages(      const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,      const Tegra::Engines::Fermi2D::Config& copy) { @@ -1154,6 +1159,20 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(          has_deleted_images = false;          src_id = FindImage(src_info, src_addr, try_options);          dst_id = FindImage(dst_info, dst_addr, try_options); +        if (!copy.must_accelerate) { +            do { +                if (!src_id && !dst_id) { +                    return std::nullopt; +                } +                if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) { +                    break; +                } +                if (dst_id && True(slot_images[dst_id].flags & ImageFlagBits::GpuModified)) { +                    break; +                } +                return std::nullopt; +            } while (false); +        }          const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;          if (src_image && src_image->info.num_samples > 1) {              RelaxedOptions find_options{FIND_OPTIONS | RelaxedOptions::ForceBrokenViews}; @@ -1194,12 +1213,12 @@ typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(              dst_id = FindOrInsertImage(dst_info, dst_addr, RelaxedOptions{});          } while (has_deleted_images);      } -    return BlitImages{ +    return {BlitImages{          .dst_id = dst_id,          .src_id = src_id,          .dst_format = dst_info.format,          .src_format = src_info.format, -    }; +    }};  }  template <class P> diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2fa8445eb..9db7195bf 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -174,7 +174,7 @@ public:      void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);      /// Blit an image with the given parameters -    void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, +    bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,                     const Tegra::Engines::Fermi2D::Surface& src,                     const Tegra::Engines::Fermi2D::Config& copy); @@ -285,9 +285,9 @@ private:      [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);      /// Return a blit image pair from the given guest blit parameters -    [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, -                                           const Tegra::Engines::Fermi2D::Surface& src, -                                           const Tegra::Engines::Fermi2D::Config& copy); +    [[nodiscard]] std::optional<BlitImages> GetBlitImages( +        const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, +        const Tegra::Engines::Fermi2D::Config& copy);      /// Find or create a sampler from a guest descriptor sampler      [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index e670acc30..013ba0ceb 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -231,6 +231,9 @@     <property name="text">      <string>Con&figure...</string>     </property> +   <property name="menuRole"> +    <enum>QAction::PreferencesRole</enum> +   </property>    </action>    <action name="action_Display_Dock_Widget_Headers">     <property name="checkable"> @@ -363,6 +366,9 @@     <property name="text">      <string>&Configure TAS...</string>     </property> +   <property name="menuRole"> +    <enum>QAction::NoRole</enum> +   </property>    </action>    <action name="action_Configure_Current_Game">     <property name="enabled"> @@ -371,6 +377,9 @@     <property name="text">      <string>Configure C&urrent Game...</string>     </property> +   <property name="menuRole"> +    <enum>QAction::NoRole</enum> +   </property>    </action>    <action name="action_TAS_Start">     <property name="enabled"> | 
