diff options
Diffstat (limited to 'src')
24 files changed, 539 insertions, 167 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 264dff546..24b7a083c 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -171,10 +171,12 @@ if(ARCHITECTURE_x86_64) PRIVATE x64/cpu_detect.cpp x64/cpu_detect.h + x64/xbyak_abi.h + x64/xbyak_util.h ) endif() create_target_directory_groups(common) target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) -target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) +target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h new file mode 100644 index 000000000..794da8a52 --- /dev/null +++ b/src/common/x64/xbyak_abi.h @@ -0,0 +1,266 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <bitset> +#include <initializer_list> +#include <xbyak.h> +#include "common/assert.h" + +namespace Common::X64 { + +inline int RegToIndex(const Xbyak::Reg& reg) { + using Kind = Xbyak::Reg::Kind; + ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, + "RegSet only support GPRs and XMM registers."); + ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15."); + return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); +} + +inline Xbyak::Reg64 IndexToReg64(int reg_index) { + ASSERT(reg_index < 16); + return Xbyak::Reg64(reg_index); +} + +inline Xbyak::Xmm IndexToXmm(int reg_index) { + ASSERT(reg_index >= 16 && reg_index < 32); + return Xbyak::Xmm(reg_index - 16); +} + +inline Xbyak::Reg IndexToReg(int reg_index) { + if (reg_index < 16) { + return IndexToReg64(reg_index); + } else { + return IndexToXmm(reg_index); + } +} + +inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { + std::bitset<32> bits; + for (const Xbyak::Reg& reg : regs) { + bits[RegToIndex(reg)] = true; + } + return bits; +} + +const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); +const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); + +#ifdef _WIN32 + +// Microsoft x64 ABI +const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; +const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; +const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; +const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; +const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; + +const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ + // GPRs + Xbyak::util::rcx, + Xbyak::util::rdx, + Xbyak::util::r8, + Xbyak::util::r9, + Xbyak::util::r10, + Xbyak::util::r11, + // XMMs + Xbyak::util::xmm0, + Xbyak::util::xmm1, + Xbyak::util::xmm2, + Xbyak::util::xmm3, + Xbyak::util::xmm4, + Xbyak::util::xmm5, +}); + +const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ + // GPRs + Xbyak::util::rbx, + Xbyak::util::rsi, + Xbyak::util::rdi, + Xbyak::util::rbp, + Xbyak::util::r12, + Xbyak::util::r13, + Xbyak::util::r14, + Xbyak::util::r15, + // XMMs + Xbyak::util::xmm6, + Xbyak::util::xmm7, + Xbyak::util::xmm8, + Xbyak::util::xmm9, + Xbyak::util::xmm10, + Xbyak::util::xmm11, + Xbyak::util::xmm12, + Xbyak::util::xmm13, + Xbyak::util::xmm14, + Xbyak::util::xmm15, +}); + +constexpr size_t ABI_SHADOW_SPACE = 0x20; + +#else + +// System V x86-64 ABI +const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; +const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; +const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; +const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; +const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; + +const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ + // GPRs + Xbyak::util::rcx, + Xbyak::util::rdx, + Xbyak::util::rdi, + Xbyak::util::rsi, + Xbyak::util::r8, + Xbyak::util::r9, + Xbyak::util::r10, + Xbyak::util::r11, + // XMMs + Xbyak::util::xmm0, + Xbyak::util::xmm1, + Xbyak::util::xmm2, + Xbyak::util::xmm3, + Xbyak::util::xmm4, + Xbyak::util::xmm5, + Xbyak::util::xmm6, + Xbyak::util::xmm7, + Xbyak::util::xmm8, + Xbyak::util::xmm9, + Xbyak::util::xmm10, + Xbyak::util::xmm11, + Xbyak::util::xmm12, + Xbyak::util::xmm13, + Xbyak::util::xmm14, + Xbyak::util::xmm15, +}); + +const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ + // GPRs + Xbyak::util::rbx, + Xbyak::util::rbp, + Xbyak::util::r12, + Xbyak::util::r13, + Xbyak::util::r14, + Xbyak::util::r15, +}); + +constexpr size_t ABI_SHADOW_SPACE = 0; + +#endif + +inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, + size_t needed_frame_size, s32* out_subtraction, + s32* out_xmm_offset) { + const auto count = (regs & ABI_ALL_GPRS).count(); + rsp_alignment -= count * 8; + size_t subtraction = 0; + const auto xmm_count = (regs & ABI_ALL_XMMS).count(); + if (xmm_count) { + // If we have any XMMs to save, we must align the stack here. + subtraction = rsp_alignment & 0xF; + } + subtraction += 0x10 * xmm_count; + size_t xmm_base_subtraction = subtraction; + subtraction += needed_frame_size; + subtraction += ABI_SHADOW_SPACE; + // Final alignment. + rsp_alignment -= subtraction; + subtraction += rsp_alignment & 0xF; + + *out_subtraction = (s32)subtraction; + *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); +} + +inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, + size_t rsp_alignment, size_t needed_frame_size = 0) { + s32 subtraction, xmm_offset; + ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_GPRS[i]) { + code.push(IndexToReg64(static_cast<int>(i))); + } + } + if (subtraction != 0) { + code.sub(code.rsp, subtraction); + } + + for (int i = 0; i < regs.count(); i++) { + if (regs.test(i) & ABI_ALL_GPRS.test(i)) { + code.push(IndexToReg64(i)); + } + } + + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_XMMS[i]) { + code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); + xmm_offset += 0x10; + } + } + + return ABI_SHADOW_SPACE; +} + +inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, + size_t rsp_alignment, size_t needed_frame_size = 0) { + s32 subtraction, xmm_offset; + ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_XMMS[i]) { + code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); + xmm_offset += 0x10; + } + } + + if (subtraction != 0) { + code.add(code.rsp, subtraction); + } + + // GPRs need to be popped in reverse order + for (int i = 15; i >= 0; i--) { + if (regs[i]) { + code.pop(IndexToReg64(i)); + } + } +} + +inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, + size_t rsp_alignment, + size_t needed_frame_size = 0) { + s32 subtraction, xmm_offset; + ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_GPRS[i]) { + code.push(IndexToReg64(static_cast<int>(i))); + } + } + + if (subtraction != 0) { + code.sub(code.rsp, subtraction); + } + + return ABI_SHADOW_SPACE; +} + +inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, + size_t rsp_alignment, size_t needed_frame_size = 0) { + s32 subtraction, xmm_offset; + ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + + if (subtraction != 0) { + code.add(code.rsp, subtraction); + } + + // GPRs need to be popped in reverse order + for (int i = 15; i >= 0; i--) { + if (regs[i]) { + code.pop(IndexToReg64(i)); + } + } +} + +} // namespace Common::X64 diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h new file mode 100644 index 000000000..df17f8cbe --- /dev/null +++ b/src/common/x64/xbyak_util.h @@ -0,0 +1,47 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <type_traits> +#include <xbyak.h> +#include "common/x64/xbyak_abi.h" + +namespace Common::X64 { + +// Constants for use with cmpps/cmpss +enum { + CMP_EQ = 0, + CMP_LT = 1, + CMP_LE = 2, + CMP_UNORD = 3, + CMP_NEQ = 4, + CMP_NLT = 5, + CMP_NLE = 6, + CMP_ORD = 7, +}; + +constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) { + const u64 distance = target - (ref + 5); + return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL); +} + +inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) { + return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target); +} + +template <typename T> +inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) { + static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer."); + size_t addr = reinterpret_cast<size_t>(f); + if (IsWithin2G(code, addr)) { + code.call(f); + } else { + // ABI_RETURN is a safe temp register to use before a call + code.mov(ABI_RETURN, addr); + code.call(ABI_RETURN); + } +} + +} // namespace Common::X64 diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f00c71dae..d6ee82836 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -229,7 +229,7 @@ endif() create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad) +target_link_libraries(video_core PRIVATE glad xbyak) if (ENABLE_VULKAN) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d9a4a1b4d..b88fce2cd 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -56,24 +56,28 @@ public: if (use_fast_cbuf || size < max_stream_size) { if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { auto& memory_manager = system.GPU().MemoryManager(); + const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); if (use_fast_cbuf) { - if (memory_manager.IsGranularRange(gpu_addr, size)) { - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - return ConstBufferUpload(host_ptr, size); + u8* dest; + if (is_granular) { + dest = memory_manager.GetPointer(gpu_addr); } else { staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - return ConstBufferUpload(staging_buffer.data(), size); + dest = staging_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); } + return ConstBufferUpload(dest, size); + } + if (is_granular) { + u8* const host_ptr = memory_manager.GetPointer(gpu_addr); + return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { + std::memcpy(dest, host_ptr, size); + }); } else { - if (memory_manager.IsGranularRange(gpu_addr, size)) { - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - return StreamBufferUpload(host_ptr, size, alignment); - } else { - staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); - return StreamBufferUpload(staging_buffer.data(), size, alignment); - } + return StreamBufferUpload( + size, alignment, [&memory_manager, gpu_addr, size](u8* dest) { + memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); + }); } } } @@ -101,7 +105,9 @@ public: BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4) { std::lock_guard lock{mutex}; - return StreamBufferUpload(raw_pointer, size, alignment); + return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { + std::memcpy(dest, raw_pointer, size); + }); } void Map(std::size_t max_size) { @@ -424,11 +430,11 @@ private: map->MarkAsModified(false, 0); } - BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, - std::size_t alignment) { + template <typename Callable> + BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { AlignBuffer(alignment); const std::size_t uploaded_offset = buffer_offset; - std::memcpy(buffer_ptr, raw_pointer, size); + callable(buffer_ptr); buffer_ptr += size; buffer_offset += size; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 024c9e43b..13ef2e42d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -106,7 +106,11 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.rasterize_enable = 1; regs.rt_separate_frag_data = 1; regs.framebuffer_srgb = 1; + regs.line_width_aliased = 1.0f; + regs.line_width_smooth = 1.0f; regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; + regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill; + regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill; shadow_state = regs; @@ -457,8 +461,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. - ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, - "Units other than CROP are unimplemented"); + if (regs.query.query_get.unit != Regs::QueryUnit::Crop) { + LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented"); + } switch (regs.query.query_get.operation) { case Regs::QueryOperation::Release: @@ -534,8 +539,8 @@ void Maxwell3D::ProcessCounterReset() { rasterizer.ResetCounter(QueryType::SamplesPassed); break; default: - LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", - static_cast<int>(regs.counter_reset)); + LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", + static_cast<int>(regs.counter_reset)); break; } } @@ -592,8 +597,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { system.GPU().GetTicks()); return {}; default: - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); + LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", + static_cast<u32>(regs.query.query_get.select.Value())); return 1; } } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 466a911db..e1b245288 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -166,8 +166,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} { const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_intel = vendor == "Intel"; - const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr; uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); @@ -182,7 +180,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} { has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); - has_broken_compute = is_intel_proprietary; has_fast_buffer_sub_data = is_nvidia; use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5; @@ -206,7 +203,6 @@ Device::Device(std::nullptr_t) { has_image_load_formatted = true; has_variable_aoffi = true; has_component_indexing_bug = false; - has_broken_compute = false; has_precise_bug = false; } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e915dbd86..683ed9002 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -80,10 +80,6 @@ public: return has_precise_bug; } - bool HasBrokenCompute() const { - return has_broken_compute; - } - bool HasFastBufferSubData() const { return has_fast_buffer_sub_data; } @@ -109,7 +105,6 @@ private: bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; - bool has_broken_compute{}; bool has_fast_buffer_sub_data{}; bool use_assembly_shaders{}; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 716d43e65..3c421dd16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -655,10 +655,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - if (device.HasBrokenCompute()) { - return; - } - buffer_cache.Acquire(); current_cbuf = 0; @@ -1024,6 +1020,26 @@ void RasterizerOpenGL::SyncViewport() { const auto& regs = gpu.regs; const bool dirty_viewport = flags[Dirty::Viewports]; + const bool dirty_clip_control = flags[Dirty::ClipControl]; + + if (dirty_clip_control || flags[Dirty::FrontFace]) { + flags[Dirty::FrontFace] = false; + + GLenum mode = MaxwellToGL::FrontFace(regs.front_face); + if (regs.screen_y_control.triangle_rast_flip != 0 && + regs.viewport_transform[0].scale_y < 0.0f) { + switch (mode) { + case GL_CW: + mode = GL_CCW; + break; + case GL_CCW: + mode = GL_CW; + break; + } + } + glFrontFace(mode); + } + if (dirty_viewport || flags[Dirty::ClipControl]) { flags[Dirty::ClipControl] = false; @@ -1121,11 +1137,6 @@ void RasterizerOpenGL::SyncCullMode() { glDisable(GL_CULL_FACE); } } - - if (flags[Dirty::FrontFace]) { - flags[Dirty::FrontFace] = false; - glFrontFace(MaxwellToGL::FrontFace(regs.front_face)); - } } void RasterizerOpenGL::SyncPrimitiveRestart() { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 253484968..9cb115959 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2344,7 +2344,12 @@ private: return {}; } - Expression MemoryBarrierGL(Operation) { + Expression MemoryBarrierGroup(Operation) { + code.AddLine("groupMemoryBarrier();"); + return {}; + } + + Expression MemoryBarrierGlobal(Operation) { code.AddLine("memoryBarrier();"); return {}; } @@ -2591,7 +2596,8 @@ private: &GLSLDecompiler::ShuffleIndexed, &GLSLDecompiler::Barrier, - &GLSLDecompiler::MemoryBarrierGL, + &GLSLDecompiler::MemoryBarrierGroup, + &GLSLDecompiler::MemoryBarrierGlobal, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6b489e6db..e7952924a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() { bool RendererOpenGL::Init() { if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); + if (Settings::values.renderer_debug) { + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); + } glDebugMessageCallback(DebugHandler, nullptr); } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 568744e3c..424278816 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); u32 packed_front_face = PackFrontFace(regs.front_face); - if (regs.screen_y_control.triangle_rast_flip != 0 && - regs.viewport_transform[0].scale_y > 0.0f) { + if (regs.screen_y_control.triangle_rast_flip != 0) { // Flip front face packed_front_face = 1 - packed_front_face; } diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 2871035f5..62e950d31 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -149,7 +149,7 @@ struct FormatTuple { {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U {VK_FORMAT_UNDEFINED}, // R16S - {VK_FORMAT_UNDEFINED}, // R16UI + {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI {VK_FORMAT_UNDEFINED}, // R16I {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 750e5a0ca..9fd8ac3f6 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { - static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, - VK_FORMAT_A8B8G8R8_UINT_PACK32, - VK_FORMAT_A8B8G8R8_SNORM_PACK32, - VK_FORMAT_A8B8G8R8_SRGB_PACK32, - VK_FORMAT_B5G6R5_UNORM_PACK16, - VK_FORMAT_A2B10G10R10_UNORM_PACK32, - VK_FORMAT_A1R5G5B5_UNORM_PACK16, - VK_FORMAT_R32G32B32A32_SFLOAT, - VK_FORMAT_R32G32B32A32_UINT, - VK_FORMAT_R32G32_SFLOAT, - VK_FORMAT_R32G32_UINT, - VK_FORMAT_R16G16B16A16_UINT, - VK_FORMAT_R16G16B16A16_SNORM, - VK_FORMAT_R16G16B16A16_UNORM, - VK_FORMAT_R16G16_UNORM, - VK_FORMAT_R16G16_SNORM, - VK_FORMAT_R16G16_SFLOAT, - VK_FORMAT_R16_UNORM, - VK_FORMAT_R8G8B8A8_SRGB, - VK_FORMAT_R8G8_UNORM, - VK_FORMAT_R8G8_SNORM, - VK_FORMAT_R8G8_UINT, - VK_FORMAT_R8_UNORM, - VK_FORMAT_R8_UINT, - VK_FORMAT_B10G11R11_UFLOAT_PACK32, - VK_FORMAT_R32_SFLOAT, - VK_FORMAT_R32_UINT, - VK_FORMAT_R32_SINT, - VK_FORMAT_R16_SFLOAT, - VK_FORMAT_R16G16B16A16_SFLOAT, - VK_FORMAT_B8G8R8A8_UNORM, - VK_FORMAT_B8G8R8A8_SRGB, - VK_FORMAT_R4G4B4A4_UNORM_PACK16, - VK_FORMAT_D32_SFLOAT, - VK_FORMAT_D16_UNORM, - VK_FORMAT_D16_UNORM_S8_UINT, - VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_D32_SFLOAT_S8_UINT, - VK_FORMAT_BC1_RGBA_UNORM_BLOCK, - VK_FORMAT_BC2_UNORM_BLOCK, - VK_FORMAT_BC3_UNORM_BLOCK, - VK_FORMAT_BC4_UNORM_BLOCK, - VK_FORMAT_BC5_UNORM_BLOCK, - VK_FORMAT_BC5_SNORM_BLOCK, - VK_FORMAT_BC7_UNORM_BLOCK, - VK_FORMAT_BC6H_UFLOAT_BLOCK, - VK_FORMAT_BC6H_SFLOAT_BLOCK, - VK_FORMAT_BC1_RGBA_SRGB_BLOCK, - VK_FORMAT_BC2_SRGB_BLOCK, - VK_FORMAT_BC3_SRGB_BLOCK, - VK_FORMAT_BC7_SRGB_BLOCK, - VK_FORMAT_ASTC_4x4_SRGB_BLOCK, - VK_FORMAT_ASTC_8x8_SRGB_BLOCK, - VK_FORMAT_ASTC_8x5_SRGB_BLOCK, - VK_FORMAT_ASTC_5x4_SRGB_BLOCK, - VK_FORMAT_ASTC_5x5_UNORM_BLOCK, - VK_FORMAT_ASTC_5x5_SRGB_BLOCK, - VK_FORMAT_ASTC_10x8_UNORM_BLOCK, - VK_FORMAT_ASTC_10x8_SRGB_BLOCK, - VK_FORMAT_ASTC_6x6_UNORM_BLOCK, - VK_FORMAT_ASTC_6x6_SRGB_BLOCK, - VK_FORMAT_ASTC_10x10_UNORM_BLOCK, - VK_FORMAT_ASTC_10x10_SRGB_BLOCK, - VK_FORMAT_ASTC_12x12_UNORM_BLOCK, - VK_FORMAT_ASTC_12x12_SRGB_BLOCK, - VK_FORMAT_ASTC_8x6_UNORM_BLOCK, - VK_FORMAT_ASTC_8x6_SRGB_BLOCK, - VK_FORMAT_ASTC_6x5_UNORM_BLOCK, - VK_FORMAT_ASTC_6x5_SRGB_BLOCK, - VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; + static constexpr std::array formats{ + VK_FORMAT_A8B8G8R8_UNORM_PACK32, + VK_FORMAT_A8B8G8R8_UINT_PACK32, + VK_FORMAT_A8B8G8R8_SNORM_PACK32, + VK_FORMAT_A8B8G8R8_SRGB_PACK32, + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_A2B10G10R10_UNORM_PACK32, + VK_FORMAT_A1R5G5B5_UNORM_PACK16, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_UINT, + VK_FORMAT_R32G32_SFLOAT, + VK_FORMAT_R32G32_UINT, + VK_FORMAT_R16G16B16A16_UINT, + VK_FORMAT_R16G16B16A16_SNORM, + VK_FORMAT_R16G16B16A16_UNORM, + VK_FORMAT_R16G16_UNORM, + VK_FORMAT_R16G16_SNORM, + VK_FORMAT_R16G16_SFLOAT, + VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_UINT, + VK_FORMAT_R8G8B8A8_SRGB, + VK_FORMAT_R8G8_UNORM, + VK_FORMAT_R8G8_SNORM, + VK_FORMAT_R8G8_UINT, + VK_FORMAT_R8_UNORM, + VK_FORMAT_R8_UINT, + VK_FORMAT_B10G11R11_UFLOAT_PACK32, + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32_UINT, + VK_FORMAT_R32_SINT, + VK_FORMAT_R16_SFLOAT, + VK_FORMAT_R16G16B16A16_SFLOAT, + VK_FORMAT_B8G8R8A8_UNORM, + VK_FORMAT_B8G8R8A8_SRGB, + VK_FORMAT_R4G4B4A4_UNORM_PACK16, + VK_FORMAT_D32_SFLOAT, + VK_FORMAT_D16_UNORM, + VK_FORMAT_D16_UNORM_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, + VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC4_UNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, + VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC7_UNORM_BLOCK, + VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC6H_SFLOAT_BLOCK, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, + VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC3_SRGB_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, + }; std::unordered_map<VkFormat, VkFormatProperties> format_properties; for (const auto format : formats) { format_properties.emplace(format, physical.GetFormatProperties(format)); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5c7b7945..65a1c6245 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { ASSERT(point_size != 0.0f); } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); + const auto& attribute = fixed_state.vertex_input.attributes[i]; + specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; + specialization.attribute_types[i] = attribute.Type(); } specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index be5b77fae..a3d992ed3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { const auto& attrib = regs.vertex_attrib_format[index]; - if (!attrib.IsValid()) { + if (attrib.IsConstant()) { vertex_input.SetAttribute(index, false, 0, 0, {}, {}); continue; } - - [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; - ASSERT(buffer.IsEnabled()); - vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), attrib.size.Value()); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 890f34a2c..a13e8baa7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -741,8 +741,10 @@ private: if (!IsGenericAttribute(index)) { continue; } - const u32 location = GetGenericAttributeLocation(index); + if (!IsAttributeEnabled(location)) { + continue; + } const auto type_descriptor = GetAttributeType(location); Id type; if (IsInputAttributeArray()) { @@ -986,6 +988,10 @@ private: return stage == ShaderType::TesselationControl; } + bool IsAttributeEnabled(u32 location) const { + return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; + } + u32 GetNumInputVertices() const { switch (stage) { case ShaderType::Geometry: @@ -1201,16 +1207,20 @@ private: UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); return {v_float_zero, Type::Float}; default: - if (IsGenericAttribute(attribute)) { - const u32 location = GetGenericAttributeLocation(attribute); - const auto type_descriptor = GetAttributeType(location); - const Type type = type_descriptor.type; - const Id attribute_id = input_attributes.at(attribute); - const std::vector elements = {element}; - const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); - return {OpLoad(GetTypeDefinition(type), pointer), type}; + if (!IsGenericAttribute(attribute)) { + break; } - break; + const u32 location = GetGenericAttributeLocation(attribute); + if (!IsAttributeEnabled(location)) { + // Disabled attributes (also known as constant attributes) always return zero. + return {v_float_zero, Type::Float}; + } + const auto type_descriptor = GetAttributeType(location); + const Type type = type_descriptor.type; + const Id attribute_id = input_attributes.at(attribute); + const std::vector elements = {element}; + const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); + return {OpLoad(GetTypeDefinition(type), pointer), type}; } UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); return {v_float_zero, Type::Float}; @@ -2215,8 +2225,8 @@ private: return {}; } - Expression MemoryBarrierGL(Operation) { - const auto scope = spv::Scope::Device; + template <spv::Scope scope> + Expression MemoryBarrier(Operation) { const auto semantics = spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | spv::MemorySemanticsMask::WorkgroupMemory | @@ -2681,7 +2691,8 @@ private: &SPIRVDecompiler::ShuffleIndexed, &SPIRVDecompiler::Barrier, - &SPIRVDecompiler::MemoryBarrierGL, + &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>, + &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f4c05ac3c..b7af26388 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -88,7 +88,8 @@ struct Specialization final { u32 shared_memory_size{}; // Graphics specific - std::optional<float> point_size{}; + std::optional<float> point_size; + std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; bool ndc_minus_one_to_one{}; }; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 045f5abb5..c0a8f233f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -299,9 +299,19 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - bb.push_back(Operation(OperationCode::MemoryBarrierGL)); + const OperationCode type = [instr] { + switch (instr.membar.type) { + case Tegra::Shader::MembarType::CTA: + return OperationCode::MemoryBarrierGroup; + case Tegra::Shader::MembarType::GL: + return OperationCode::MemoryBarrierGlobal; + default: + UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value())); + return OperationCode::MemoryBarrierGlobal; + } + }(); + bb.push_back(Operation(type)); break; } case OpCode::Id::DEPBAR: { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c06512413..c5e5165ff 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -233,8 +233,9 @@ enum class OperationCode { ThreadLtMask, /// () -> uint ShuffleIndexed, /// (uint value, uint index) -> uint - Barrier, /// () -> void - MemoryBarrierGL, /// () -> void + Barrier, /// () -> void + MemoryBarrierGroup, /// () -> void + MemoryBarrierGlobal, /// () -> void Amount, }; diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7032e0059..f476f03b0 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,7 +41,7 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array<Table, 77> DefinitionTable = {{ +constexpr std::array<Table, 78> DefinitionTable = {{ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, @@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{ {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, + {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8bfc541d4..45e3ddd2c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -14,6 +14,7 @@ #include <unordered_map> #include <vector> +#include <boost/container/small_vector.hpp> #include <boost/icl/interval_map.hpp> #include <boost/range/iterator_range.hpp> @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template <typename TSurface, typename TView> class TextureCache { + using VectorSurface = boost::container::small_vector<TSurface, 1>; public: void InvalidateRegion(VAddr addr, std::size_t size) { @@ -308,18 +310,20 @@ public: dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(VAddr addr) { + TSurface TryFindFramebufferSurface(VAddr addr) const { if (!addr) { return nullptr; } const VAddr page = addr >> registry_page_bits; - std::vector<TSurface>& list = registry[page]; - for (auto& surface : list) { - if (surface->GetCpuAddr() == addr) { - return surface; - } + const auto it = registry.find(page); + if (it == registry.end()) { + return nullptr; } - return nullptr; + const auto& list = it->second; + const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { + return surface->GetCpuAddr() == addr; + }); + return found != list.end() ? *found : nullptr; } u64 Tick() { @@ -498,7 +502,7 @@ private: * @param untopological Indicates to the recycler that the texture has no way * to match the overlaps due to topological reasons. **/ - RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, + RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::IsGPULevelExtreme()) { return RecycleStrategy::Flush; @@ -538,9 +542,8 @@ private: * @param untopological Indicates to the recycler that the texture has no way to match the * overlaps due to topological reasons. **/ - std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, - const SurfaceParams& params, const GPUVAddr gpu_addr, - const bool preserve_contents, + std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); for (auto& surface : overlaps) { @@ -650,7 +653,7 @@ private: * @param params The parameters on the new surface. * @param gpu_addr The starting address of the new surface. **/ - std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, + std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { @@ -708,7 +711,7 @@ private: * @param preserve_contents Indicates that the new surface should be loaded from memory or * left blank. */ - std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, + std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const VAddr cpu_addr, @@ -810,7 +813,7 @@ private: TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - std::vector<TSurface> overlaps{current_surface}; + VectorSurface overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } @@ -1126,23 +1129,25 @@ private: } } - std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { + VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } const VAddr cpu_addr_end = cpu_addr + size; - VAddr start = cpu_addr >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; - std::vector<TSurface> surfaces; - while (start <= end) { - std::vector<TSurface>& list = registry[start]; - for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { - surface->MarkAsPicked(true); - surfaces.push_back(surface); + VectorSurface surfaces; + for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { + const auto it = registry.find(start); + if (it == registry.end()) { + continue; + } + for (auto& surface : it->second) { + if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { + continue; } + surface->MarkAsPicked(true); + surfaces.push_back(surface); } - start++; } for (auto& surface : surfaces) { surface->MarkAsPicked(false); diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 1adf8932b..1f5e43043 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -106,6 +106,9 @@ public: format.setVersion(4, 3); format.setProfile(QSurfaceFormat::CompatibilityProfile); format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); + if (Settings::values.renderer_debug) { + format.setOption(QSurfaceFormat::FormatOption::DebugContext); + } // TODO: expose a setting for buffer value (ie default/single/double/triple) format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); format.setSwapInterval(0); diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 411e7e647..09cc0a3b5 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen) SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); + if (Settings::values.renderer_debug) { + SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); + } SDL_GL_SetSwapInterval(0); std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, |