summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt4
-rw-r--r--src/audio_core/audio_renderer.cpp118
-rw-r--r--src/audio_core/audio_renderer.h23
-rw-r--r--src/audio_core/common.h1
-rw-r--r--src/audio_core/stream.cpp25
-rw-r--r--src/audio_core/stream.h5
-rw-r--r--src/common/CMakeLists.txt23
-rw-r--r--src/common/atomic_ops.cpp70
-rw-r--r--src/common/atomic_ops.h17
-rw-r--r--src/common/fiber.cpp222
-rw-r--r--src/common/fiber.h92
-rw-r--r--src/common/memory_detect.cpp60
-rw-r--r--src/common/memory_detect.h22
-rw-r--r--src/common/spin_lock.cpp54
-rw-r--r--src/common/spin_lock.h26
-rw-r--r--src/common/telemetry.cpp1
-rw-r--r--src/common/thread.cpp52
-rw-r--r--src/common/thread.h13
-rw-r--r--src/common/time_zone.cpp49
-rw-r--r--src/common/time_zone.h18
-rw-r--r--src/common/uint128.cpp26
-rw-r--r--src/common/uint128.h3
-rw-r--r--src/common/wall_clock.cpp91
-rw-r--r--src/common/wall_clock.h53
-rw-r--r--src/common/x64/cpu_detect.cpp38
-rw-r--r--src/common/x64/cpu_detect.h13
-rw-r--r--src/common/x64/native_clock.cpp103
-rw-r--r--src/common/x64/native_clock.h48
-rw-r--r--src/common/x64/xbyak_abi.h229
-rw-r--r--src/common/x64/xbyak_util.h47
-rw-r--r--src/core/CMakeLists.txt16
-rw-r--r--src/core/arm/arm_interface.cpp57
-rw-r--r--src/core/arm/arm_interface.h20
-rw-r--r--src/core/arm/cpu_interrupt_handler.cpp29
-rw-r--r--src/core/arm/cpu_interrupt_handler.h39
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp103
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h12
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp110
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.h26
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.cpp81
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.h126
-rw-r--r--src/core/arm/dynarmic/arm_exclusive_monitor.cpp76
-rw-r--r--src/core/arm/dynarmic/arm_exclusive_monitor.h48
-rw-r--r--src/core/arm/exclusive_monitor.cpp2
-rw-r--r--src/core/arm/exclusive_monitor.h6
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp19
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h5
-rw-r--r--src/core/core.cpp128
-rw-r--r--src/core/core.h48
-rw-r--r--src/core/core_manager.cpp67
-rw-r--r--src/core/core_manager.h63
-rw-r--r--src/core/core_timing.cpp256
-rw-r--r--src/core/core_timing.h123
-rw-r--r--src/core/core_timing_util.cpp44
-rw-r--r--src/core/core_timing_util.h18
-rw-r--r--src/core/cpu_manager.cpp368
-rw-r--r--src/core/cpu_manager.h80
-rw-r--r--src/core/file_sys/control_metadata.cpp4
-rw-r--r--src/core/file_sys/control_metadata.h1
-rw-r--r--src/core/file_sys/patch_manager.cpp34
-rw-r--r--src/core/file_sys/patch_manager.h5
-rw-r--r--src/core/file_sys/savedata_factory.cpp9
-rw-r--r--src/core/file_sys/system_archive/mii_model.cpp2
-rw-r--r--src/core/file_sys/system_archive/shared_font.cpp2
-rw-r--r--src/core/file_sys/system_archive/system_version.cpp14
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/framebuffer_layout.h5
-rw-r--r--src/core/gdbstub/gdbstub.cpp1
-rw-r--r--src/core/hardware_properties.h4
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp212
-rw-r--r--src/core/hle/kernel/address_arbiter.h3
-rw-r--r--src/core/hle/kernel/client_port.cpp2
-rw-r--r--src/core/hle/kernel/errors.h1
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp100
-rw-r--r--src/core/hle/kernel/hle_ipc.h11
-rw-r--r--src/core/hle/kernel/kernel.cpp255
-rw-r--r--src/core/hle/kernel/kernel.h39
-rw-r--r--src/core/hle/kernel/memory/memory_manager.cpp5
-rw-r--r--src/core/hle/kernel/mutex.cpp118
-rw-r--r--src/core/hle/kernel/mutex.h4
-rw-r--r--src/core/hle/kernel/physical_core.cpp52
-rw-r--r--src/core/hle/kernel/physical_core.h44
-rw-r--r--src/core/hle/kernel/process.cpp29
-rw-r--r--src/core/hle/kernel/readable_event.cpp5
-rw-r--r--src/core/hle/kernel/resource_limit.cpp6
-rw-r--r--src/core/hle/kernel/scheduler.cpp576
-rw-r--r--src/core/hle/kernel/scheduler.h123
-rw-r--r--src/core/hle/kernel/server_session.cpp20
-rw-r--r--src/core/hle/kernel/svc.cpp464
-rw-r--r--src/core/hle/kernel/svc_wrap.h137
-rw-r--r--src/core/hle/kernel/synchronization.cpp137
-rw-r--r--src/core/hle/kernel/synchronization_object.cpp64
-rw-r--r--src/core/hle/kernel/synchronization_object.h18
-rw-r--r--src/core/hle/kernel/thread.cpp424
-rw-r--r--src/core/hle/kernel/thread.h277
-rw-r--r--src/core/hle/kernel/time_manager.cpp23
-rw-r--r--src/core/hle/kernel/time_manager.h4
-rw-r--r--src/core/hle/service/acc/acc.cpp341
-rw-r--r--src/core/hle/service/acc/acc_aa.cpp4
-rw-r--r--src/core/hle/service/acc/acc_su.cpp34
-rw-r--r--src/core/hle/service/acc/acc_u0.cpp18
-rw-r--r--src/core/hle/service/acc/acc_u1.cpp29
-rw-r--r--src/core/hle/service/am/am.cpp18
-rw-r--r--src/core/hle/service/am/am.h2
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp6
-rw-r--r--src/core/hle/service/am/spsm.cpp16
-rw-r--r--src/core/hle/service/aoc/aoc_u.cpp1
-rw-r--r--src/core/hle/service/bcat/backend/boxcat.cpp2
-rw-r--r--src/core/hle/service/bcat/bcat.cpp2
-rw-r--r--src/core/hle/service/bcat/module.cpp3
-rw-r--r--src/core/hle/service/bpc/bpc.cpp20
-rw-r--r--src/core/hle/service/btdrv/btdrv.cpp167
-rw-r--r--src/core/hle/service/btm/btm.cpp147
-rw-r--r--src/core/hle/service/caps/caps.cpp2
-rw-r--r--src/core/hle/service/caps/caps.h76
-rw-r--r--src/core/hle/service/caps/caps_a.cpp2
-rw-r--r--src/core/hle/service/caps/caps_a.h2
-rw-r--r--src/core/hle/service/caps/caps_c.cpp2
-rw-r--r--src/core/hle/service/caps/caps_c.h2
-rw-r--r--src/core/hle/service/caps/caps_sc.cpp2
-rw-r--r--src/core/hle/service/caps/caps_sc.h2
-rw-r--r--src/core/hle/service/caps/caps_ss.cpp2
-rw-r--r--src/core/hle/service/caps/caps_ss.h2
-rw-r--r--src/core/hle/service/caps/caps_su.cpp2
-rw-r--r--src/core/hle/service/caps/caps_su.h2
-rw-r--r--src/core/hle/service/caps/caps_u.cpp26
-rw-r--r--src/core/hle/service/caps/caps_u.h2
-rw-r--r--src/core/hle/service/es/es.cpp47
-rw-r--r--src/core/hle/service/eupld/eupld.cpp1
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp38
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h1
-rw-r--r--src/core/hle/service/friend/friend.cpp6
-rw-r--r--src/core/hle/service/grc/grc.cpp3
-rw-r--r--src/core/hle/service/hid/controllers/debug_pad.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/gesture.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.cpp9
-rw-r--r--src/core/hle/service/hid/controllers/mouse.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp10
-rw-r--r--src/core/hle/service/hid/controllers/npad.h10
-rw-r--r--src/core/hle/service/hid/controllers/stubbed.cpp2
-rw-r--r--src/core/hle/service/hid/controllers/touchscreen.cpp4
-rw-r--r--src/core/hle/service/hid/controllers/xpad.cpp2
-rw-r--r--src/core/hle/service/hid/hid.cpp169
-rw-r--r--src/core/hle/service/hid/hid.h15
-rw-r--r--src/core/hle/service/hid/irs.cpp2
-rw-r--r--src/core/hle/service/lbl/lbl.cpp1
-rw-r--r--src/core/hle/service/ldn/ldn.cpp1
-rw-r--r--src/core/hle/service/ldr/ldr.cpp105
-rw-r--r--src/core/hle/service/lm/manager.cpp3
-rw-r--r--src/core/hle/service/mig/mig.cpp6
-rw-r--r--src/core/hle/service/mm/mm_u.cpp32
-rw-r--r--src/core/hle/service/ncm/ncm.cpp20
-rw-r--r--src/core/hle/service/nfc/nfc.cpp6
-rw-r--r--src/core/hle/service/nifm/nifm.cpp3
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp28
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h18
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp4
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h10
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp64
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h21
-rw-r--r--src/core/hle/service/prepo/prepo.cpp7
-rw-r--r--src/core/hle/service/sm/sm.cpp2
-rw-r--r--src/core/hle/service/time/standard_steady_clock_core.cpp5
-rw-r--r--src/core/hle/service/time/tick_based_steady_clock_core.cpp5
-rw-r--r--src/core/hle/service/time/time.cpp5
-rw-r--r--src/core/hle/service/time/time_manager.cpp11
-rw-r--r--src/core/hle/service/time/time_sharedmemory.cpp3
-rw-r--r--src/core/hle/service/time/time_zone_content_manager.cpp24
-rw-r--r--src/core/hle/service/vi/vi.cpp2
-rw-r--r--src/core/memory.cpp109
-rw-r--r--src/core/memory.h67
-rw-r--r--src/core/memory/cheat_engine.cpp8
-rw-r--r--src/core/perf_stats.cpp2
-rw-r--r--src/core/reporter.cpp3
-rw-r--r--src/core/settings.cpp24
-rw-r--r--src/core/settings.h11
-rw-r--r--src/core/telemetry_session.cpp1
-rw-r--r--src/core/tools/freezer.cpp8
-rw-r--r--src/input_common/keyboard.cpp2
-rw-r--r--src/input_common/motion_emu.cpp2
-rw-r--r--src/tests/CMakeLists.txt1
-rw-r--r--src/tests/common/fibers.cpp358
-rw-r--r--src/tests/core/core_timing.cpp182
-rw-r--r--src/video_core/CMakeLists.txt20
-rw-r--r--src/video_core/buffer_cache/buffer_block.h27
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h481
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h133
-rw-r--r--src/video_core/compatible_formats.cpp162
-rw-r--r--src/video_core/compatible_formats.h32
-rw-r--r--src/video_core/dma_pusher.cpp9
-rw-r--r--src/video_core/dma_pusher.h1
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h1
-rw-r--r--src/video_core/engines/kepler_compute.cpp5
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp45
-rw-r--r--src/video_core/engines/maxwell_3d.h47
-rw-r--r--src/video_core/engines/shader_bytecode.h36
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/gpu.h7
-rw-r--r--src/video_core/gpu_asynch.cpp9
-rw-r--r--src/video_core/gpu_asynch.h2
-rw-r--r--src/video_core/gpu_synch.cpp8
-rw-r--r--src/video_core/gpu_synch.h2
-rw-r--r--src/video_core/gpu_thread.cpp7
-rw-r--r--src/video_core/macro/macro.cpp91
-rw-r--r--src/video_core/macro/macro.h141
-rw-r--r--src/video_core/macro/macro_hle.cpp113
-rw-r--r--src/video_core/macro/macro_hle.h44
-rw-r--r--src/video_core/macro/macro_interpreter.cpp (renamed from src/video_core/macro_interpreter.cpp)199
-rw-r--r--src/video_core/macro/macro_interpreter.h (renamed from src/video_core/macro_interpreter.h)51
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp621
-rw-r--r--src/video_core/macro/macro_jit_x64.h98
-rw-r--r--src/video_core/memory_manager.cpp40
-rw-r--r--src/video_core/memory_manager.h12
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/query_cache.h10
-rw-r--r--src/video_core/rasterizer_cache.cpp7
-rw-r--r--src/video_core/rasterizer_cache.h197
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2073
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp72
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h49
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp123
-rw-r--r--src/video_core/renderer_opengl/gl_device.h32
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp395
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h41
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp192
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h65
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp316
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp110
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h56
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h25
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp124
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h32
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h119
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp36
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h8
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp12
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h8
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp202
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp32
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp98
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h42
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp173
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp93
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp191
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h23
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp19
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp248
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp109
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h32
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp19
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h6
-rw-r--r--src/video_core/shader/decode.cpp2
-rw-r--r--src/video_core/shader/decode/half_set.cpp88
-rw-r--r--src/video_core/shader/decode/image.cpp26
-rw-r--r--src/video_core/shader/decode/memory.cpp3
-rw-r--r--src/video_core/shader/decode/other.cpp42
-rw-r--r--src/video_core/shader/decode/texture.cpp55
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/memory_util.cpp4
-rw-r--r--src/video_core/shader/node.h105
-rw-r--r--src/video_core/shader/node_helper.h2
-rw-r--r--src/video_core/shader/registry.cpp20
-rw-r--r--src/video_core/shader/registry.h35
-rw-r--r--src/video_core/shader/shader_ir.cpp109
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/shader/track.cpp78
-rw-r--r--src/video_core/shader_cache.h228
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h79
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp4
-rw-r--r--src/video_core/texture_cache/surface_base.cpp10
-rw-r--r--src/video_core/texture_cache/surface_base.h13
-rw-r--r--src/video_core/texture_cache/surface_params.cpp19
-rw-r--r--src/video_core/texture_cache/texture_cache.h270
-rw-r--r--src/web_service/CMakeLists.txt2
-rw-r--r--src/web_service/telemetry_json.cpp2
-rw-r--r--src/web_service/verify_login.cpp2
-rw-r--r--src/yuzu/CMakeLists.txt4
-rw-r--r--src/yuzu/bootmanager.cpp90
-rw-r--r--src/yuzu/bootmanager.h8
-rw-r--r--src/yuzu/configuration/config.cpp23
-rw-r--r--src/yuzu/configuration/config.h2
-rw-r--r--src/yuzu/configuration/configure_debug.cpp3
-rw-r--r--src/yuzu/configuration/configure_debug.ui13
-rw-r--r--src/yuzu/configuration/configure_general.cpp6
-rw-r--r--src/yuzu/configuration/configure_general.ui7
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp45
-rw-r--r--src/yuzu/configuration/configure_graphics.ui40
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp3
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui10
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp3
-rw-r--r--src/yuzu/configuration/configure_service.cpp6
-rw-r--r--src/yuzu/configuration/configure_system.cpp2
-rw-r--r--src/yuzu/configuration/configure_system.h1
-rw-r--r--src/yuzu/configuration/configure_system.ui257
-rw-r--r--src/yuzu/debugger/wait_tree.cpp54
-rw-r--r--src/yuzu/discord_impl.cpp2
-rw-r--r--src/yuzu/game_list.cpp8
-rw-r--r--src/yuzu/game_list.h2
-rw-r--r--src/yuzu/loading_screen.cpp3
-rw-r--r--src/yuzu/main.cpp214
-rw-r--r--src/yuzu/main.h11
-rw-r--r--src/yuzu/main.ui30
-rw-r--r--src/yuzu/yuzu.rc2
-rw-r--r--src/yuzu_cmd/config.cpp11
-rw-r--r--src/yuzu_cmd/default_ini.h15
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp3
-rw-r--r--src/yuzu_cmd/yuzu.cpp5
-rw-r--r--src/yuzu_cmd/yuzu.rc2
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h5
-rw-r--r--src/yuzu_tester/service/yuzutest.cpp2
-rw-r--r--src/yuzu_tester/yuzu.cpp5
-rw-r--r--src/yuzu_tester/yuzu.rc2
336 files changed, 14590 insertions, 4954 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3a57356ab..1e977e8a8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -62,6 +62,10 @@ else()
-Wno-unused-parameter
)
+ if (ARCHITECTURE_x86_64)
+ add_compile_options("-mcx16")
+ endif()
+
if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang)
add_compile_options("-stdlib=libc++")
endif()
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index d18ef6940..d64452617 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -17,7 +17,7 @@ namespace AudioCore {
constexpr u32 STREAM_SAMPLE_RATE{48000};
constexpr u32 STREAM_NUM_CHANNELS{2};
-
+using VoiceChannelHolder = std::array<VoiceResourceInformation*, 6>;
class AudioRenderer::VoiceState {
public:
bool IsPlaying() const {
@@ -37,9 +37,10 @@ public:
}
void SetWaveIndex(std::size_t index);
- std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory);
+ std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory,
+ const VoiceChannelHolder& voice_resources);
void UpdateState();
- void RefreshBuffer(Core::Memory::Memory& memory);
+ void RefreshBuffer(Core::Memory::Memory& memory, const VoiceChannelHolder& voice_resources);
private:
bool is_in_use{};
@@ -79,7 +80,7 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
std::shared_ptr<Kernel::WritableEvent> buffer_event,
std::size_t instance_number)
: worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
- effects(params.effect_count), memory{memory_} {
+ voice_resources(params.voice_count), effects(params.effect_count), memory{memory_} {
behavior_info.SetUserRevision(params.revision);
audio_out = std::make_unique<AudioCore::AudioOut>();
stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
@@ -127,6 +128,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
input_params.data() + sizeof(UpdateDataHeader) + config.behavior_size,
memory_pool_count * sizeof(MemoryPoolInfo));
+ // Copy voice resources
+ const std::size_t voice_resource_offset{sizeof(UpdateDataHeader) + config.behavior_size +
+ config.memory_pools_size};
+ std::memcpy(voice_resources.data(), input_params.data() + voice_resource_offset,
+ sizeof(VoiceResourceInformation) * voice_resources.size());
+
// Copy VoiceInfo structs
std::size_t voice_offset{sizeof(UpdateDataHeader) + config.behavior_size +
config.memory_pools_size + config.voice_resource_size};
@@ -173,11 +180,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
// Copy output header
UpdateDataHeader response_data{worker_params};
- std::vector<u8> output_params(response_data.total_size);
if (behavior_info.IsElapsedFrameCountSupported()) {
- response_data.frame_count = 0x10;
- response_data.total_size += 0x10;
+ response_data.render_info = sizeof(RendererInfo);
+ response_data.total_size += sizeof(RendererInfo);
}
+
+ std::vector<u8> output_params(response_data.total_size);
std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader));
// Copy output memory pool entries
@@ -212,6 +220,17 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
return Audren::ERR_INVALID_PARAMETERS;
}
+ if (behavior_info.IsElapsedFrameCountSupported()) {
+ const std::size_t renderer_info_offset{
+ sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size +
+ response_data.effects_size + response_data.sinks_size +
+ response_data.performance_manager_size + response_data.behavior_size};
+ RendererInfo renderer_info{};
+ renderer_info.elasped_frame_count = elapsed_frame_count;
+ std::memcpy(output_params.data() + renderer_info_offset, &renderer_info,
+ sizeof(RendererInfo));
+ }
+
return MakeResult(output_params);
}
@@ -220,14 +239,15 @@ void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) {
is_refresh_pending = true;
}
-std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count,
- Core::Memory::Memory& memory) {
+std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(
+ std::size_t sample_count, Core::Memory::Memory& memory,
+ const VoiceChannelHolder& voice_resources) {
if (!IsPlaying()) {
return {};
}
if (is_refresh_pending) {
- RefreshBuffer(memory);
+ RefreshBuffer(memory, voice_resources);
}
const std::size_t max_size{samples.size() - offset};
@@ -271,7 +291,8 @@ void AudioRenderer::VoiceState::UpdateState() {
is_in_use = info.is_in_use;
}
-void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) {
+void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory,
+ const VoiceChannelHolder& voice_resources) {
const auto wave_buffer_address = info.wave_buffer[wave_index].buffer_addr;
const auto wave_buffer_size = info.wave_buffer[wave_index].buffer_sz;
std::vector<s16> new_samples(wave_buffer_size / sizeof(s16));
@@ -296,17 +317,77 @@ void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) {
}
switch (info.channel_count) {
- case 1:
+ case 1: {
// 1 channel is upsampled to 2 channel
samples.resize(new_samples.size() * 2);
+
for (std::size_t index = 0; index < new_samples.size(); ++index) {
- samples[index * 2] = new_samples[index];
- samples[index * 2 + 1] = new_samples[index];
+ auto sample = static_cast<float>(new_samples[index]);
+ if (voice_resources[0]->in_use) {
+ sample *= voice_resources[0]->mix_volumes[0];
+ }
+
+ samples[index * 2] = static_cast<s16>(sample * info.volume);
+ samples[index * 2 + 1] = static_cast<s16>(sample * info.volume);
}
break;
+ }
case 2: {
// 2 channel is played as is
samples = std::move(new_samples);
+ const std::size_t sample_count = (samples.size() / 2);
+ for (std::size_t index = 0; index < sample_count; ++index) {
+ const std::size_t index_l = index * 2;
+ const std::size_t index_r = index * 2 + 1;
+
+ auto sample_l = static_cast<float>(samples[index_l]);
+ auto sample_r = static_cast<float>(samples[index_r]);
+
+ if (voice_resources[0]->in_use) {
+ sample_l *= voice_resources[0]->mix_volumes[0];
+ }
+
+ if (voice_resources[1]->in_use) {
+ sample_r *= voice_resources[1]->mix_volumes[1];
+ }
+
+ samples[index_l] = static_cast<s16>(sample_l * info.volume);
+ samples[index_r] = static_cast<s16>(sample_r * info.volume);
+ }
+ break;
+ }
+ case 6: {
+ samples.resize((new_samples.size() / 6) * 2);
+ const std::size_t sample_count = samples.size() / 2;
+
+ for (std::size_t index = 0; index < sample_count; ++index) {
+ auto FL = static_cast<float>(new_samples[index * 6]);
+ auto FR = static_cast<float>(new_samples[index * 6 + 1]);
+ auto FC = static_cast<float>(new_samples[index * 6 + 2]);
+ auto BL = static_cast<float>(new_samples[index * 6 + 4]);
+ auto BR = static_cast<float>(new_samples[index * 6 + 5]);
+
+ if (voice_resources[0]->in_use) {
+ FL *= voice_resources[0]->mix_volumes[0];
+ }
+ if (voice_resources[1]->in_use) {
+ FR *= voice_resources[1]->mix_volumes[1];
+ }
+ if (voice_resources[2]->in_use) {
+ FC *= voice_resources[2]->mix_volumes[2];
+ }
+ if (voice_resources[4]->in_use) {
+ BL *= voice_resources[4]->mix_volumes[4];
+ }
+ if (voice_resources[5]->in_use) {
+ BR *= voice_resources[5]->mix_volumes[5];
+ }
+
+ samples[index * 2] =
+ static_cast<s16>((0.3694f * FL + 0.2612f * FC + 0.3694f * BL) * info.volume);
+ samples[index * 2 + 1] =
+ static_cast<s16>((0.3694f * FR + 0.2612f * FC + 0.3694f * BR) * info.volume);
+ }
break;
}
default:
@@ -352,11 +433,17 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
if (!voice.IsPlaying()) {
continue;
}
+ VoiceChannelHolder resources{};
+ for (u32 channel = 0; channel < voice.GetInfo().channel_count; channel++) {
+ const auto channel_resource_id = voice.GetInfo().voice_channel_resource_ids[channel];
+ resources[channel] = &voice_resources[channel_resource_id];
+ }
std::size_t offset{};
s64 samples_remaining{BUFFER_SIZE};
while (samples_remaining > 0) {
- const std::vector<s16> samples{voice.DequeueSamples(samples_remaining, memory)};
+ const std::vector<s16> samples{
+ voice.DequeueSamples(samples_remaining, memory, resources)};
if (samples.empty()) {
break;
@@ -372,6 +459,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
}
}
audio_out->QueueBuffer(stream, tag, std::move(buffer));
+ elapsed_frame_count++;
}
void AudioRenderer::ReleaseAndQueueBuffers() {
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index b42770fae..f0b691a86 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -9,6 +9,7 @@
#include <vector>
#include "audio_core/behavior_info.h"
+#include "audio_core/common.h"
#include "audio_core/stream.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
@@ -116,6 +117,14 @@ struct WaveBuffer {
};
static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size");
+struct VoiceResourceInformation {
+ s32_le id{};
+ std::array<float_le, MAX_MIX_BUFFERS> mix_volumes{};
+ bool in_use{};
+ INSERT_PADDING_BYTES(11);
+};
+static_assert(sizeof(VoiceResourceInformation) == 0x70, "VoiceResourceInformation has wrong size");
+
struct VoiceInfo {
u32_le id;
u32_le node_id;
@@ -187,6 +196,12 @@ struct EffectOutStatus {
};
static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size");
+struct RendererInfo {
+ u64_le elasped_frame_count{};
+ INSERT_PADDING_WORDS(2);
+};
+static_assert(sizeof(RendererInfo) == 0x10, "RendererInfo is an invalid size");
+
struct UpdateDataHeader {
UpdateDataHeader() {}
@@ -200,7 +215,7 @@ struct UpdateDataHeader {
mixes_size = 0x0;
sinks_size = config.sink_count * 0x20;
performance_manager_size = 0x10;
- frame_count = 0;
+ render_info = 0;
total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size +
effects_size + sinks_size + performance_manager_size;
}
@@ -214,8 +229,8 @@ struct UpdateDataHeader {
u32_le mixes_size{};
u32_le sinks_size{};
u32_le performance_manager_size{};
- INSERT_PADDING_WORDS(1);
- u32_le frame_count{};
+ u32_le splitter_size{};
+ u32_le render_info{};
INSERT_PADDING_WORDS(4);
u32_le total_size{};
};
@@ -244,10 +259,12 @@ private:
AudioRendererParameter worker_params;
std::shared_ptr<Kernel::WritableEvent> buffer_event;
std::vector<VoiceState> voices;
+ std::vector<VoiceResourceInformation> voice_resources;
std::vector<EffectState> effects;
std::unique_ptr<AudioOut> audio_out;
StreamPtr stream;
Core::Memory::Memory& memory;
+ std::size_t elapsed_frame_count{};
};
} // namespace AudioCore
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index 98478b66b..7bb145c53 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
}
constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8');
+constexpr std::size_t MAX_MIX_BUFFERS = 24;
static constexpr u32 VersionFromRevision(u32_le rev) {
// "REV7" -> 7
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4ca98f8ea..dfc4805d9 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -59,15 +59,24 @@ Stream::State Stream::GetState() const {
return state;
}
-s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
+s64 Stream::GetBufferReleaseNS(const Buffer& buffer) const {
const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
- const auto us =
- std::chrono::microseconds((static_cast<u64>(num_samples) * 1000000) / sample_rate);
- return Core::Timing::usToCycles(us);
+ const auto ns =
+ std::chrono::nanoseconds((static_cast<u64>(num_samples) * 1000000000ULL) / sample_rate);
+ return ns.count();
+}
+
+s64 Stream::GetBufferReleaseNSHostTiming(const Buffer& buffer) const {
+ const std::size_t num_samples{buffer.GetSamples().size() / GetNumChannels()};
+ /// DSP signals before playing the last sample, in HLE we emulate this in this way
+ s64 base_samples = std::max<s64>(static_cast<s64>(num_samples) - 1, 0);
+ const auto ns =
+ std::chrono::nanoseconds((static_cast<u64>(base_samples) * 1000000000ULL) / sample_rate);
+ return ns.count();
}
static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) {
- const float volume{std::clamp(Settings::values.volume - (1.0f - game_volume), 0.0f, 1.0f)};
+ const float volume{std::clamp(Settings::Volume() - (1.0f - game_volume), 0.0f, 1.0f)};
if (volume == 1.0f) {
return;
@@ -105,7 +114,11 @@ void Stream::PlayNextBuffer() {
sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples());
- core_timing.ScheduleEvent(GetBufferReleaseCycles(*active_buffer), release_event, {});
+ if (core_timing.IsHostTiming()) {
+ core_timing.ScheduleEvent(GetBufferReleaseNSHostTiming(*active_buffer), release_event, {});
+ } else {
+ core_timing.ScheduleEvent(GetBufferReleaseNS(*active_buffer), release_event, {});
+ }
}
void Stream::ReleaseActiveBuffer() {
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index 1708a4d98..e309d60fe 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -96,7 +96,10 @@ private:
void ReleaseActiveBuffer();
/// Gets the number of core cycles when the specified buffer will be released
- s64 GetBufferReleaseCycles(const Buffer& buffer) const;
+ s64 GetBufferReleaseNS(const Buffer& buffer) const;
+
+ /// Gets the number of core cycles when the specified buffer will be released
+ s64 GetBufferReleaseNSHostTiming(const Buffer& buffer) const;
u32 sample_rate; ///< Sample rate of the stream
Format format; ///< Format of the stream
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 6ffc612e7..d120c8d3d 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
DEPENDS
# WARNING! It was too much work to try and make a common location for this list,
# so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
+ "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
+ "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
"${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
"${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
@@ -96,6 +98,8 @@ add_library(common STATIC
algorithm.h
alignment.h
assert.h
+ atomic_ops.cpp
+ atomic_ops.h
detached_tasks.cpp
detached_tasks.h
bit_field.h
@@ -108,6 +112,8 @@ add_library(common STATIC
common_types.h
dynamic_library.cpp
dynamic_library.h
+ fiber.cpp
+ fiber.h
file_util.cpp
file_util.h
hash.h
@@ -123,6 +129,8 @@ add_library(common STATIC
lz4_compression.cpp
lz4_compression.h
math_util.h
+ memory_detect.cpp
+ memory_detect.h
memory_hook.cpp
memory_hook.h
microprofile.cpp
@@ -139,6 +147,8 @@ add_library(common STATIC
scm_rev.cpp
scm_rev.h
scope_exit.h
+ spin_lock.cpp
+ spin_lock.h
string_util.cpp
string_util.h
swap.h
@@ -148,6 +158,8 @@ add_library(common STATIC
thread.h
thread_queue_list.h
threadsafe_queue.h
+ time_zone.cpp
+ time_zone.h
timer.cpp
timer.h
uint128.cpp
@@ -157,6 +169,8 @@ add_library(common STATIC
vector_math.h
virtual_buffer.cpp
virtual_buffer.h
+ wall_clock.cpp
+ wall_clock.h
web_result.h
zstd_compression.cpp
zstd_compression.h
@@ -167,10 +181,15 @@ if(ARCHITECTURE_x86_64)
PRIVATE
x64/cpu_detect.cpp
x64/cpu_detect.h
+ x64/native_clock.cpp
+ x64/native_clock.h
+ x64/xbyak_abi.h
+ x64/xbyak_util.h
)
endif()
create_target_directory_groups(common)
+find_package(Boost 1.71 COMPONENTS context headers REQUIRED)
-target_link_libraries(common PUBLIC Boost::boost fmt microprofile)
-target_link_libraries(common PRIVATE lz4_static libzstd_static)
+target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile)
+target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
diff --git a/src/common/atomic_ops.cpp b/src/common/atomic_ops.cpp
new file mode 100644
index 000000000..1098e21ff
--- /dev/null
+++ b/src/common/atomic_ops.cpp
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/atomic_ops.h"
+
+#if _MSC_VER
+#include <intrin.h>
+#endif
+
+namespace Common {
+
+#if _MSC_VER
+
+bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) {
+ u8 result = _InterlockedCompareExchange8((char*)pointer, value, expected);
+ return result == expected;
+}
+
+bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) {
+ u16 result = _InterlockedCompareExchange16((short*)pointer, value, expected);
+ return result == expected;
+}
+
+bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) {
+ u32 result = _InterlockedCompareExchange((long*)pointer, value, expected);
+ return result == expected;
+}
+
+bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
+ u64 result = _InterlockedCompareExchange64((__int64*)pointer, value, expected);
+ return result == expected;
+}
+
+bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
+ return _InterlockedCompareExchange128((__int64*)pointer, value[1], value[0],
+ (__int64*)expected.data()) != 0;
+}
+
+#else
+
+bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected) {
+ return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected) {
+ return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected) {
+ return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected) {
+ return __sync_bool_compare_and_swap(pointer, expected, value);
+}
+
+bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected) {
+ unsigned __int128 value_a;
+ unsigned __int128 expected_a;
+ std::memcpy(&value_a, value.data(), sizeof(u128));
+ std::memcpy(&expected_a, expected.data(), sizeof(u128));
+ return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
+}
+
+#endif
+
+} // namespace Common
diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h
new file mode 100644
index 000000000..e6181d521
--- /dev/null
+++ b/src/common/atomic_ops.h
@@ -0,0 +1,17 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Common {
+
+bool AtomicCompareAndSwap(u8 volatile* pointer, u8 value, u8 expected);
+bool AtomicCompareAndSwap(u16 volatile* pointer, u16 value, u16 expected);
+bool AtomicCompareAndSwap(u32 volatile* pointer, u32 value, u32 expected);
+bool AtomicCompareAndSwap(u64 volatile* pointer, u64 value, u64 expected);
+bool AtomicCompareAndSwap(u64 volatile* pointer, u128 value, u128 expected);
+
+} // namespace Common
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
new file mode 100644
index 000000000..1c1d09ccb
--- /dev/null
+++ b/src/common/fiber.cpp
@@ -0,0 +1,222 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/fiber.h"
+#if defined(_WIN32) || defined(WIN32)
+#include <windows.h>
+#else
+#include <boost/context/detail/fcontext.hpp>
+#endif
+
+namespace Common {
+
+constexpr std::size_t default_stack_size = 256 * 1024; // 256kb
+
+#if defined(_WIN32) || defined(WIN32)
+
+struct Fiber::FiberImpl {
+ LPVOID handle = nullptr;
+ LPVOID rewind_handle = nullptr;
+};
+
+void Fiber::Start() {
+ ASSERT(previous_fiber != nullptr);
+ previous_fiber->guard.unlock();
+ previous_fiber.reset();
+ entry_point(start_parameter);
+ UNREACHABLE();
+}
+
+void Fiber::OnRewind() {
+ ASSERT(impl->handle != nullptr);
+ DeleteFiber(impl->handle);
+ impl->handle = impl->rewind_handle;
+ impl->rewind_handle = nullptr;
+ rewind_point(rewind_parameter);
+ UNREACHABLE();
+}
+
+void Fiber::FiberStartFunc(void* fiber_parameter) {
+ auto fiber = static_cast<Fiber*>(fiber_parameter);
+ fiber->Start();
+}
+
+void Fiber::RewindStartFunc(void* fiber_parameter) {
+ auto fiber = static_cast<Fiber*>(fiber_parameter);
+ fiber->OnRewind();
+}
+
+Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
+ : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
+ impl = std::make_unique<FiberImpl>();
+ impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this);
+}
+
+Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
+
+Fiber::~Fiber() {
+ if (released) {
+ return;
+ }
+ // Make sure the Fiber is not being used
+ const bool locked = guard.try_lock();
+ ASSERT_MSG(locked, "Destroying a fiber that's still running");
+ if (locked) {
+ guard.unlock();
+ }
+ DeleteFiber(impl->handle);
+}
+
+void Fiber::Exit() {
+ ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
+ if (!is_thread_fiber) {
+ return;
+ }
+ ConvertFiberToThread();
+ guard.unlock();
+ released = true;
+}
+
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+ rewind_point = std::move(rewind_func);
+ rewind_parameter = start_parameter;
+}
+
+void Fiber::Rewind() {
+ ASSERT(rewind_point);
+ ASSERT(impl->rewind_handle == nullptr);
+ impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this);
+ SwitchToFiber(impl->rewind_handle);
+}
+
+void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
+ ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
+ ASSERT_MSG(to != nullptr, "Next fiber is null!");
+ to->guard.lock();
+ to->previous_fiber = from;
+ SwitchToFiber(to->impl->handle);
+ ASSERT(from->previous_fiber != nullptr);
+ from->previous_fiber->guard.unlock();
+ from->previous_fiber.reset();
+}
+
+std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
+ std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
+ fiber->guard.lock();
+ fiber->impl->handle = ConvertThreadToFiber(nullptr);
+ fiber->is_thread_fiber = true;
+ return fiber;
+}
+
+#else
+
+struct Fiber::FiberImpl {
+ alignas(64) std::array<u8, default_stack_size> stack;
+ alignas(64) std::array<u8, default_stack_size> rewind_stack;
+ u8* stack_limit;
+ u8* rewind_stack_limit;
+ boost::context::detail::fcontext_t context;
+ boost::context::detail::fcontext_t rewind_context;
+};
+
+void Fiber::Start(boost::context::detail::transfer_t& transfer) {
+ ASSERT(previous_fiber != nullptr);
+ previous_fiber->impl->context = transfer.fctx;
+ previous_fiber->guard.unlock();
+ previous_fiber.reset();
+ entry_point(start_parameter);
+ UNREACHABLE();
+}
+
+void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) {
+ ASSERT(impl->context != nullptr);
+ impl->context = impl->rewind_context;
+ impl->rewind_context = nullptr;
+ u8* tmp = impl->stack_limit;
+ impl->stack_limit = impl->rewind_stack_limit;
+ impl->rewind_stack_limit = tmp;
+ rewind_point(rewind_parameter);
+ UNREACHABLE();
+}
+
+void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
+ auto fiber = static_cast<Fiber*>(transfer.data);
+ fiber->Start(transfer);
+}
+
+void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
+ auto fiber = static_cast<Fiber*>(transfer.data);
+ fiber->OnRewind(transfer);
+}
+
+Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
+ : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
+ impl = std::make_unique<FiberImpl>();
+ impl->stack_limit = impl->stack.data();
+ impl->rewind_stack_limit = impl->rewind_stack.data();
+ u8* stack_base = impl->stack_limit + default_stack_size;
+ impl->context =
+ boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
+}
+
+void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
+ rewind_point = std::move(rewind_func);
+ rewind_parameter = start_parameter;
+}
+
+Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
+
+Fiber::~Fiber() {
+ if (released) {
+ return;
+ }
+ // Make sure the Fiber is not being used
+ const bool locked = guard.try_lock();
+ ASSERT_MSG(locked, "Destroying a fiber that's still running");
+ if (locked) {
+ guard.unlock();
+ }
+}
+
+void Fiber::Exit() {
+
+ ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
+ if (!is_thread_fiber) {
+ return;
+ }
+ guard.unlock();
+ released = true;
+}
+
+void Fiber::Rewind() {
+ ASSERT(rewind_point);
+ ASSERT(impl->rewind_context == nullptr);
+ u8* stack_base = impl->rewind_stack_limit + default_stack_size;
+ impl->rewind_context =
+ boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc);
+ boost::context::detail::jump_fcontext(impl->rewind_context, this);
+}
+
+void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
+ ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
+ ASSERT_MSG(to != nullptr, "Next fiber is null!");
+ to->guard.lock();
+ to->previous_fiber = from;
+ auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
+ ASSERT(from->previous_fiber != nullptr);
+ from->previous_fiber->impl->context = transfer.fctx;
+ from->previous_fiber->guard.unlock();
+ from->previous_fiber.reset();
+}
+
+std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
+ std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
+ fiber->guard.lock();
+ fiber->is_thread_fiber = true;
+ return fiber;
+}
+
+#endif
+} // namespace Common
diff --git a/src/common/fiber.h b/src/common/fiber.h
new file mode 100644
index 000000000..dafc1100e
--- /dev/null
+++ b/src/common/fiber.h
@@ -0,0 +1,92 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "common/common_types.h"
+#include "common/spin_lock.h"
+
+#if !defined(_WIN32) && !defined(WIN32)
+namespace boost::context::detail {
+struct transfer_t;
+}
+#endif
+
+namespace Common {
+
+/**
+ * Fiber class
+ * a fiber is a userspace thread with it's own context. They can be used to
+ * implement coroutines, emulated threading systems and certain asynchronous
+ * patterns.
+ *
+ * This class implements fibers at a low level, thus allowing greater freedom
+ * to implement such patterns. This fiber class is 'threadsafe' only one fiber
+ * can be running at a time and threads will be locked while trying to yield to
+ * a running fiber until it yields. WARNING exchanging two running fibers between
+ * threads will cause a deadlock. In order to prevent a deadlock, each thread should
+ * have an intermediary fiber, you switch to the intermediary fiber of the current
+ * thread and then from it switch to the expected fiber. This way you can exchange
+ * 2 fibers within 2 different threads.
+ */
+class Fiber {
+public:
+ Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter);
+ ~Fiber();
+
+ Fiber(const Fiber&) = delete;
+ Fiber& operator=(const Fiber&) = delete;
+
+ Fiber(Fiber&&) = default;
+ Fiber& operator=(Fiber&&) = default;
+
+ /// Yields control from Fiber 'from' to Fiber 'to'
+ /// Fiber 'from' must be the currently running fiber.
+ static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to);
+ static std::shared_ptr<Fiber> ThreadToFiber();
+
+ void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
+
+ void Rewind();
+
+ /// Only call from main thread's fiber
+ void Exit();
+
+ /// Changes the start parameter of the fiber. Has no effect if the fiber already started
+ void SetStartParameter(void* new_parameter) {
+ start_parameter = new_parameter;
+ }
+
+private:
+ Fiber();
+
+#if defined(_WIN32) || defined(WIN32)
+ void OnRewind();
+ void Start();
+ static void FiberStartFunc(void* fiber_parameter);
+ static void RewindStartFunc(void* fiber_parameter);
+#else
+ void OnRewind(boost::context::detail::transfer_t& transfer);
+ void Start(boost::context::detail::transfer_t& transfer);
+ static void FiberStartFunc(boost::context::detail::transfer_t transfer);
+ static void RewindStartFunc(boost::context::detail::transfer_t transfer);
+#endif
+
+ struct FiberImpl;
+
+ SpinLock guard{};
+ std::function<void(void*)> entry_point;
+ std::function<void(void*)> rewind_point;
+ void* rewind_parameter{};
+ void* start_parameter{};
+ std::shared_ptr<Fiber> previous_fiber;
+ std::unique_ptr<FiberImpl> impl;
+ bool is_thread_fiber{};
+ bool released{};
+};
+
+} // namespace Common
diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp
new file mode 100644
index 000000000..3fdc309a2
--- /dev/null
+++ b/src/common/memory_detect.cpp
@@ -0,0 +1,60 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#ifdef _WIN32
+// clang-format off
+#include <windows.h>
+#include <sysinfoapi.h>
+// clang-format on
+#else
+#include <sys/types.h>
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+#else
+#include <sys/sysinfo.h>
+#endif
+#endif
+
+#include "common/memory_detect.h"
+
+namespace Common {
+
+// Detects the RAM and Swapfile sizes
+static MemoryInfo Detect() {
+ MemoryInfo mem_info{};
+
+#ifdef _WIN32
+ MEMORYSTATUSEX memorystatus;
+ memorystatus.dwLength = sizeof(memorystatus);
+ GlobalMemoryStatusEx(&memorystatus);
+ mem_info.TotalPhysicalMemory = memorystatus.ullTotalPhys;
+ mem_info.TotalSwapMemory = memorystatus.ullTotalPageFile - mem_info.TotalPhysicalMemory;
+#elif defined(__APPLE__)
+ u64 ramsize;
+ struct xsw_usage vmusage;
+ std::size_t sizeof_ramsize = sizeof(ramsize);
+ std::size_t sizeof_vmusage = sizeof(vmusage);
+ // hw and vm are defined in sysctl.h
+ // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471
+ // sysctlbyname(const char *, void *, size_t *, void *, size_t);
+ sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0);
+ sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0);
+ mem_info.TotalPhysicalMemory = ramsize;
+ mem_info.TotalSwapMemory = vmusage.xsu_total;
+#else
+ struct sysinfo meminfo;
+ sysinfo(&meminfo);
+ mem_info.TotalPhysicalMemory = meminfo.totalram;
+ mem_info.TotalSwapMemory = meminfo.totalswap;
+#endif
+
+ return mem_info;
+}
+
+const MemoryInfo& GetMemInfo() {
+ static MemoryInfo mem_info = Detect();
+ return mem_info;
+}
+
+} // namespace Common \ No newline at end of file
diff --git a/src/common/memory_detect.h b/src/common/memory_detect.h
new file mode 100644
index 000000000..a73c0f3f4
--- /dev/null
+++ b/src/common/memory_detect.h
@@ -0,0 +1,22 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Common {
+
+struct MemoryInfo {
+ u64 TotalPhysicalMemory{};
+ u64 TotalSwapMemory{};
+};
+
+/**
+ * Gets the memory info of the host system
+ * @return Reference to a MemoryInfo struct with the physical and swap memory sizes in bytes
+ */
+const MemoryInfo& GetMemInfo();
+
+} // namespace Common \ No newline at end of file
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
new file mode 100644
index 000000000..c1524220f
--- /dev/null
+++ b/src/common/spin_lock.cpp
@@ -0,0 +1,54 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/spin_lock.h"
+
+#if _MSC_VER
+#include <intrin.h>
+#if _M_AMD64
+#define __x86_64__ 1
+#endif
+#if _M_ARM64
+#define __aarch64__ 1
+#endif
+#else
+#if __x86_64__
+#include <xmmintrin.h>
+#endif
+#endif
+
+namespace {
+
+void ThreadPause() {
+#if __x86_64__
+ _mm_pause();
+#elif __aarch64__ && _MSC_VER
+ __yield();
+#elif __aarch64__
+ asm("yield");
+#endif
+}
+
+} // Anonymous namespace
+
+namespace Common {
+
+void SpinLock::lock() {
+ while (lck.test_and_set(std::memory_order_acquire)) {
+ ThreadPause();
+ }
+}
+
+void SpinLock::unlock() {
+ lck.clear(std::memory_order_release);
+}
+
+bool SpinLock::try_lock() {
+ if (lck.test_and_set(std::memory_order_acquire)) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace Common
diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h
new file mode 100644
index 000000000..1df5528c4
--- /dev/null
+++ b/src/common/spin_lock.h
@@ -0,0 +1,26 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+
+namespace Common {
+
+/**
+ * SpinLock class
+ * a lock similar to mutex that forces a thread to spin wait instead calling the
+ * supervisor. Should be used on short sequences of code.
+ */
+class SpinLock {
+public:
+ void lock();
+ void unlock();
+ bool try_lock();
+
+private:
+ std::atomic_flag lck = ATOMIC_FLAG_INIT;
+};
+
+} // namespace Common
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 200c6489a..16d42facd 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) {
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
+ fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index 0cd2d10bf..8e5935e6a 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -25,6 +25,52 @@
namespace Common {
+#ifdef _WIN32
+
+void SetCurrentThreadPriority(ThreadPriority new_priority) {
+ auto handle = GetCurrentThread();
+ int windows_priority = 0;
+ switch (new_priority) {
+ case ThreadPriority::Low:
+ windows_priority = THREAD_PRIORITY_BELOW_NORMAL;
+ break;
+ case ThreadPriority::Normal:
+ windows_priority = THREAD_PRIORITY_NORMAL;
+ break;
+ case ThreadPriority::High:
+ windows_priority = THREAD_PRIORITY_ABOVE_NORMAL;
+ break;
+ case ThreadPriority::VeryHigh:
+ windows_priority = THREAD_PRIORITY_HIGHEST;
+ break;
+ default:
+ windows_priority = THREAD_PRIORITY_NORMAL;
+ break;
+ }
+ SetThreadPriority(handle, windows_priority);
+}
+
+#else
+
+void SetCurrentThreadPriority(ThreadPriority new_priority) {
+ pthread_t this_thread = pthread_self();
+
+ s32 max_prio = sched_get_priority_max(SCHED_OTHER);
+ s32 min_prio = sched_get_priority_min(SCHED_OTHER);
+ u32 level = static_cast<u32>(new_priority) + 1;
+
+ struct sched_param params;
+ if (max_prio > min_prio) {
+ params.sched_priority = min_prio + ((max_prio - min_prio) * level) / 4;
+ } else {
+ params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4;
+ }
+
+ pthread_setschedparam(this_thread, SCHED_OTHER, &params);
+}
+
+#endif
+
#ifdef _MSC_VER
// Sets the debugger-visible name of the current thread.
@@ -70,6 +116,12 @@ void SetCurrentThreadName(const char* name) {
}
#endif
+#if defined(_WIN32)
+void SetCurrentThreadName(const char* name) {
+ // Do Nothing on MingW
+}
+#endif
+
#endif
} // namespace Common
diff --git a/src/common/thread.h b/src/common/thread.h
index 2fc071685..52b359413 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,6 +9,7 @@
#include <cstddef>
#include <mutex>
#include <thread>
+#include "common/common_types.h"
namespace Common {
@@ -28,8 +29,7 @@ public:
is_set = false;
}
- template <class Duration>
- bool WaitFor(const std::chrono::duration<Duration>& time) {
+ bool WaitFor(const std::chrono::nanoseconds& time) {
std::unique_lock lk{mutex};
if (!condvar.wait_for(lk, time, [this] { return is_set; }))
return false;
@@ -86,6 +86,15 @@ private:
std::size_t generation = 0; // Incremented once each time the barrier is used
};
+enum class ThreadPriority : u32 {
+ Low = 0,
+ Normal = 1,
+ High = 2,
+ VeryHigh = 3,
+};
+
+void SetCurrentThreadPriority(ThreadPriority new_priority);
+
void SetCurrentThreadName(const char* name);
} // namespace Common
diff --git a/src/common/time_zone.cpp b/src/common/time_zone.cpp
new file mode 100644
index 000000000..ce239eb63
--- /dev/null
+++ b/src/common/time_zone.cpp
@@ -0,0 +1,49 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include <iomanip>
+#include <sstream>
+
+#include "common/logging/log.h"
+#include "common/time_zone.h"
+
+namespace Common::TimeZone {
+
+std::string GetDefaultTimeZone() {
+ return "GMT";
+}
+
+static std::string GetOsTimeZoneOffset() {
+ const std::time_t t{std::time(nullptr)};
+ const std::tm tm{*std::localtime(&t)};
+
+ std::stringstream ss;
+ ss << std::put_time(&tm, "%z"); // Get the current timezone offset, e.g. "-400", as a string
+
+ return ss.str();
+}
+
+static int ConvertOsTimeZoneOffsetToInt(const std::string& timezone) {
+ try {
+ return std::stoi(timezone);
+ } catch (const std::invalid_argument&) {
+ LOG_CRITICAL(Common, "invalid_argument with {}!", timezone);
+ return 0;
+ } catch (const std::out_of_range&) {
+ LOG_CRITICAL(Common, "out_of_range with {}!", timezone);
+ return 0;
+ }
+}
+
+std::chrono::seconds GetCurrentOffsetSeconds() {
+ const int offset{ConvertOsTimeZoneOffsetToInt(GetOsTimeZoneOffset())};
+
+ int seconds{(offset / 100) * 60 * 60}; // Convert hour component to seconds
+ seconds += (offset % 100) * 60; // Convert minute component to seconds
+
+ return std::chrono::seconds{seconds};
+}
+
+} // namespace Common::TimeZone
diff --git a/src/common/time_zone.h b/src/common/time_zone.h
new file mode 100644
index 000000000..945daa09c
--- /dev/null
+++ b/src/common/time_zone.h
@@ -0,0 +1,18 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <string>
+
+namespace Common::TimeZone {
+
+/// Gets the default timezone, i.e. "GMT"
+std::string GetDefaultTimeZone();
+
+/// Gets the offset of the current timezone (from the default), in seconds
+std::chrono::seconds GetCurrentOffsetSeconds();
+
+} // namespace Common::TimeZone
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 32bf56730..16bf7c828 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -6,12 +6,38 @@
#include <intrin.h>
#pragma intrinsic(_umul128)
+#pragma intrinsic(_udiv128)
#endif
#include <cstring>
#include "common/uint128.h"
namespace Common {
+#ifdef _MSC_VER
+
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+ u128 r{};
+ r[0] = _umul128(a, b, &r[1]);
+ u64 remainder;
+#if _MSC_VER < 1923
+ return udiv128(r[1], r[0], d, &remainder);
+#else
+ return _udiv128(r[1], r[0], d, &remainder);
+#endif
+}
+
+#else
+
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
+ const u64 diva = a / d;
+ const u64 moda = a % d;
+ const u64 divb = b / d;
+ const u64 modb = b % d;
+ return diva * b + moda * divb + moda * modb / d;
+}
+
+#endif
+
u128 Multiply64Into128(u64 a, u64 b) {
u128 result;
#ifdef _MSC_VER
diff --git a/src/common/uint128.h b/src/common/uint128.h
index a3be2a2cb..503cd2d0c 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -9,6 +9,9 @@
namespace Common {
+// This function multiplies 2 u64 values and divides it by a u64 value.
+u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
+
// This function multiplies 2 u64 values and produces a u128 value;
u128 Multiply64Into128(u64 a, u64 b);
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
new file mode 100644
index 000000000..3afbdb898
--- /dev/null
+++ b/src/common/wall_clock.cpp
@@ -0,0 +1,91 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/uint128.h"
+#include "common/wall_clock.h"
+
+#ifdef ARCHITECTURE_x86_64
+#include "common/x64/cpu_detect.h"
+#include "common/x64/native_clock.h"
+#endif
+
+namespace Common {
+
+using base_timer = std::chrono::steady_clock;
+using base_time_point = std::chrono::time_point<base_timer>;
+
+class StandardWallClock : public WallClock {
+public:
+ StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency)
+ : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) {
+ start_time = base_timer::now();
+ }
+
+ std::chrono::nanoseconds GetTimeNS() override {
+ base_time_point current = base_timer::now();
+ auto elapsed = current - start_time;
+ return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
+ }
+
+ std::chrono::microseconds GetTimeUS() override {
+ base_time_point current = base_timer::now();
+ auto elapsed = current - start_time;
+ return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
+ }
+
+ std::chrono::milliseconds GetTimeMS() override {
+ base_time_point current = base_timer::now();
+ auto elapsed = current - start_time;
+ return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
+ }
+
+ u64 GetClockCycles() override {
+ std::chrono::nanoseconds time_now = GetTimeNS();
+ const u128 temporary =
+ Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
+ return Common::Divide128On32(temporary, 1000000000).first;
+ }
+
+ u64 GetCPUCycles() override {
+ std::chrono::nanoseconds time_now = GetTimeNS();
+ const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
+ return Common::Divide128On32(temporary, 1000000000).first;
+ }
+
+ void Pause(bool is_paused) override {
+ // Do nothing in this clock type.
+ }
+
+private:
+ base_time_point start_time;
+};
+
+#ifdef ARCHITECTURE_x86_64
+
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+ u32 emulated_clock_frequency) {
+ const auto& caps = GetCPUCaps();
+ u64 rtsc_frequency = 0;
+ if (caps.invariant_tsc) {
+ rtsc_frequency = EstimateRDTSCFrequency();
+ }
+ if (rtsc_frequency == 0) {
+ return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
+ emulated_clock_frequency);
+ } else {
+ return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency,
+ rtsc_frequency);
+ }
+}
+
+#else
+
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+ u32 emulated_clock_frequency) {
+ return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
+}
+
+#endif
+
+} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
new file mode 100644
index 000000000..367d72134
--- /dev/null
+++ b/src/common/wall_clock.h
@@ -0,0 +1,53 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <chrono>
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Common {
+
+class WallClock {
+public:
+ /// Returns current wall time in nanoseconds
+ virtual std::chrono::nanoseconds GetTimeNS() = 0;
+
+ /// Returns current wall time in microseconds
+ virtual std::chrono::microseconds GetTimeUS() = 0;
+
+ /// Returns current wall time in milliseconds
+ virtual std::chrono::milliseconds GetTimeMS() = 0;
+
+ /// Returns current wall time in emulated clock cycles
+ virtual u64 GetClockCycles() = 0;
+
+ /// Returns current wall time in emulated cpu cycles
+ virtual u64 GetCPUCycles() = 0;
+
+ virtual void Pause(bool is_paused) = 0;
+
+ /// Tells if the wall clock, uses the host CPU's hardware clock
+ bool IsNative() const {
+ return is_native;
+ }
+
+protected:
+ WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native)
+ : emulated_cpu_frequency{emulated_cpu_frequency},
+ emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {}
+
+ u64 emulated_cpu_frequency;
+ u64 emulated_clock_frequency;
+
+private:
+ bool is_native;
+};
+
+std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
+ u32 emulated_clock_frequency);
+
+} // namespace Common
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b4..fccd2eee5 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -62,6 +62,17 @@ static CPUCaps Detect() {
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
+ if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
+ caps.manufacturer = Manufacturer::Intel;
+ else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
+ caps.manufacturer = Manufacturer::AMD;
+ else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
+ caps.manufacturer = Manufacturer::Hygon;
+ else
+ caps.manufacturer = Manufacturer::Unknown;
+
+ u32 family = {};
+ u32 model = {};
__cpuid(cpu_id, 0x80000000);
@@ -73,6 +84,14 @@ static CPUCaps Detect() {
// Detect family and other miscellaneous features
if (max_std_fn >= 1) {
__cpuid(cpu_id, 0x00000001);
+ family = (cpu_id[0] >> 8) & 0xf;
+ model = (cpu_id[0] >> 4) & 0xf;
+ if (family == 0xf) {
+ family += (cpu_id[0] >> 20) & 0xff;
+ }
+ if (family >= 6) {
+ model += ((cpu_id[0] >> 16) & 0xf) << 4;
+ }
if ((cpu_id[3] >> 25) & 1)
caps.sse = true;
@@ -110,6 +129,11 @@ static CPUCaps Detect() {
caps.bmi1 = true;
if ((cpu_id[1] >> 8) & 1)
caps.bmi2 = true;
+ // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
+ if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
+ (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
+ caps.avx512 = caps.avx2;
+ }
}
}
@@ -130,6 +154,20 @@ static CPUCaps Detect() {
caps.fma4 = true;
}
+ if (max_ex_fn >= 0x80000007) {
+ __cpuid(cpu_id, 0x80000007);
+ if (cpu_id[3] & (1 << 8)) {
+ caps.invariant_tsc = true;
+ }
+ }
+
+ if (max_std_fn >= 0x16) {
+ __cpuid(cpu_id, 0x16);
+ caps.base_frequency = cpu_id[0];
+ caps.max_frequency = cpu_id[1];
+ caps.bus_frequency = cpu_id[2];
+ }
+
return caps;
}
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234..e3b63302e 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -6,8 +6,16 @@
namespace Common {
+enum class Manufacturer : u32 {
+ Intel = 0,
+ AMD = 1,
+ Hygon = 2,
+ Unknown = 3,
+};
+
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
+ Manufacturer manufacturer;
char cpu_string[0x21];
char brand_string[0x41];
bool sse;
@@ -19,11 +27,16 @@ struct CPUCaps {
bool lzcnt;
bool avx;
bool avx2;
+ bool avx512;
bool bmi1;
bool bmi2;
bool fma;
bool fma4;
bool aes;
+ bool invariant_tsc;
+ u32 base_frequency;
+ u32 max_frequency;
+ u32 bus_frequency;
};
/**
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
new file mode 100644
index 000000000..424b39b1f
--- /dev/null
+++ b/src/common/x64/native_clock.cpp
@@ -0,0 +1,103 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include <mutex>
+#include <thread>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#else
+#include <x86intrin.h>
+#endif
+
+#include "common/uint128.h"
+#include "common/x64/native_clock.h"
+
+namespace Common {
+
+u64 EstimateRDTSCFrequency() {
+ const auto milli_10 = std::chrono::milliseconds{10};
+ // get current time
+ _mm_mfence();
+ const u64 tscStart = __rdtsc();
+ const auto startTime = std::chrono::high_resolution_clock::now();
+ // wait roughly 3 seconds
+ while (true) {
+ auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::high_resolution_clock::now() - startTime);
+ if (milli.count() >= 3000)
+ break;
+ std::this_thread::sleep_for(milli_10);
+ }
+ const auto endTime = std::chrono::high_resolution_clock::now();
+ _mm_mfence();
+ const u64 tscEnd = __rdtsc();
+ // calculate difference
+ const u64 timer_diff =
+ std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
+ const u64 tsc_diff = tscEnd - tscStart;
+ const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
+ return tsc_freq;
+}
+
+namespace X64 {
+NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
+ u64 rtsc_frequency)
+ : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
+ rtsc_frequency} {
+ _mm_mfence();
+ last_measure = __rdtsc();
+ accumulated_ticks = 0U;
+}
+
+u64 NativeClock::GetRTSC() {
+ std::scoped_lock scope{rtsc_serialize};
+ _mm_mfence();
+ const u64 current_measure = __rdtsc();
+ u64 diff = current_measure - last_measure;
+ diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
+ if (current_measure > last_measure) {
+ last_measure = current_measure;
+ }
+ accumulated_ticks += diff;
+ /// The clock cannot be more precise than the guest timer, remove the lower bits
+ return accumulated_ticks & inaccuracy_mask;
+}
+
+void NativeClock::Pause(bool is_paused) {
+ if (!is_paused) {
+ _mm_mfence();
+ last_measure = __rdtsc();
+ }
+}
+
+std::chrono::nanoseconds NativeClock::GetTimeNS() {
+ const u64 rtsc_value = GetRTSC();
+ return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
+}
+
+std::chrono::microseconds NativeClock::GetTimeUS() {
+ const u64 rtsc_value = GetRTSC();
+ return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
+}
+
+std::chrono::milliseconds NativeClock::GetTimeMS() {
+ const u64 rtsc_value = GetRTSC();
+ return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
+}
+
+u64 NativeClock::GetClockCycles() {
+ const u64 rtsc_value = GetRTSC();
+ return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
+}
+
+u64 NativeClock::GetCPUCycles() {
+ const u64 rtsc_value = GetRTSC();
+ return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
+}
+
+} // namespace X64
+
+} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
new file mode 100644
index 000000000..891a3bbfd
--- /dev/null
+++ b/src/common/x64/native_clock.h
@@ -0,0 +1,48 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+
+#include "common/spin_lock.h"
+#include "common/wall_clock.h"
+
+namespace Common {
+
+namespace X64 {
+class NativeClock : public WallClock {
+public:
+ NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
+
+ std::chrono::nanoseconds GetTimeNS() override;
+
+ std::chrono::microseconds GetTimeUS() override;
+
+ std::chrono::milliseconds GetTimeMS() override;
+
+ u64 GetClockCycles() override;
+
+ u64 GetCPUCycles() override;
+
+ void Pause(bool is_paused) override;
+
+private:
+ u64 GetRTSC();
+
+ /// value used to reduce the native clocks accuracy as some apss rely on
+ /// undefined behavior where the level of accuracy in the clock shouldn't
+ /// be higher.
+ static constexpr u64 inaccuracy_mask = ~(0x400 - 1);
+
+ SpinLock rtsc_serialize{};
+ u64 last_measure{};
+ u64 accumulated_ticks{};
+ u64 rtsc_frequency;
+};
+} // namespace X64
+
+u64 EstimateRDTSCFrequency();
+
+} // namespace Common
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
new file mode 100644
index 000000000..a5f5d4fc1
--- /dev/null
+++ b/src/common/x64/xbyak_abi.h
@@ -0,0 +1,229 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <bitset>
+#include <initializer_list>
+#include <xbyak.h>
+#include "common/assert.h"
+
+namespace Common::X64 {
+
+inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
+ using Kind = Xbyak::Reg::Kind;
+ ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
+ "RegSet only support GPRs and XMM registers.");
+ ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
+ return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
+}
+
+inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
+ ASSERT(reg_index < 16);
+ return Xbyak::Reg64(static_cast<int>(reg_index));
+}
+
+inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
+ ASSERT(reg_index >= 16 && reg_index < 32);
+ return Xbyak::Xmm(static_cast<int>(reg_index - 16));
+}
+
+inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
+ if (reg_index < 16) {
+ return IndexToReg64(reg_index);
+ } else {
+ return IndexToXmm(reg_index);
+ }
+}
+
+inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
+ std::bitset<32> bits;
+ for (const Xbyak::Reg& reg : regs) {
+ bits[RegToIndex(reg)] = true;
+ }
+ return bits;
+}
+
+const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
+const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
+
+#ifdef _WIN32
+
+// Microsoft x64 ABI
+const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
+const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
+const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
+const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
+const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
+
+const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rcx,
+ Xbyak::util::rdx,
+ Xbyak::util::r8,
+ Xbyak::util::r9,
+ Xbyak::util::r10,
+ Xbyak::util::r11,
+ // XMMs
+ Xbyak::util::xmm0,
+ Xbyak::util::xmm1,
+ Xbyak::util::xmm2,
+ Xbyak::util::xmm3,
+ Xbyak::util::xmm4,
+ Xbyak::util::xmm5,
+});
+
+const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rbx,
+ Xbyak::util::rsi,
+ Xbyak::util::rdi,
+ Xbyak::util::rbp,
+ Xbyak::util::r12,
+ Xbyak::util::r13,
+ Xbyak::util::r14,
+ Xbyak::util::r15,
+ // XMMs
+ Xbyak::util::xmm6,
+ Xbyak::util::xmm7,
+ Xbyak::util::xmm8,
+ Xbyak::util::xmm9,
+ Xbyak::util::xmm10,
+ Xbyak::util::xmm11,
+ Xbyak::util::xmm12,
+ Xbyak::util::xmm13,
+ Xbyak::util::xmm14,
+ Xbyak::util::xmm15,
+});
+
+constexpr size_t ABI_SHADOW_SPACE = 0x20;
+
+#else
+
+// System V x86-64 ABI
+const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
+const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
+const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
+const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
+const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
+
+const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rcx,
+ Xbyak::util::rdx,
+ Xbyak::util::rdi,
+ Xbyak::util::rsi,
+ Xbyak::util::r8,
+ Xbyak::util::r9,
+ Xbyak::util::r10,
+ Xbyak::util::r11,
+ // XMMs
+ Xbyak::util::xmm0,
+ Xbyak::util::xmm1,
+ Xbyak::util::xmm2,
+ Xbyak::util::xmm3,
+ Xbyak::util::xmm4,
+ Xbyak::util::xmm5,
+ Xbyak::util::xmm6,
+ Xbyak::util::xmm7,
+ Xbyak::util::xmm8,
+ Xbyak::util::xmm9,
+ Xbyak::util::xmm10,
+ Xbyak::util::xmm11,
+ Xbyak::util::xmm12,
+ Xbyak::util::xmm13,
+ Xbyak::util::xmm14,
+ Xbyak::util::xmm15,
+});
+
+const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
+ // GPRs
+ Xbyak::util::rbx,
+ Xbyak::util::rbp,
+ Xbyak::util::r12,
+ Xbyak::util::r13,
+ Xbyak::util::r14,
+ Xbyak::util::r15,
+});
+
+constexpr size_t ABI_SHADOW_SPACE = 0;
+
+#endif
+
+struct ABIFrameInfo {
+ s32 subtraction;
+ s32 xmm_offset;
+};
+
+inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
+ size_t needed_frame_size) {
+ const auto count = (regs & ABI_ALL_GPRS).count();
+ rsp_alignment -= count * 8;
+ size_t subtraction = 0;
+ const auto xmm_count = (regs & ABI_ALL_XMMS).count();
+ if (xmm_count) {
+ // If we have any XMMs to save, we must align the stack here.
+ subtraction = rsp_alignment & 0xF;
+ }
+ subtraction += 0x10 * xmm_count;
+ size_t xmm_base_subtraction = subtraction;
+ subtraction += needed_frame_size;
+ subtraction += ABI_SHADOW_SPACE;
+ // Final alignment.
+ rsp_alignment -= subtraction;
+ subtraction += rsp_alignment & 0xF;
+
+ return ABIFrameInfo{static_cast<s32>(subtraction),
+ static_cast<s32>(subtraction - xmm_base_subtraction)};
+}
+
+inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+ size_t rsp_alignment, size_t needed_frame_size = 0) {
+ auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_GPRS[i]) {
+ code.push(IndexToReg64(i));
+ }
+ }
+
+ if (frame_info.subtraction != 0) {
+ code.sub(code.rsp, frame_info.subtraction);
+ }
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_XMMS[i]) {
+ code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
+ frame_info.xmm_offset += 0x10;
+ }
+ }
+
+ return ABI_SHADOW_SPACE;
+}
+
+inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
+ size_t rsp_alignment, size_t needed_frame_size = 0) {
+ auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
+
+ for (std::size_t i = 0; i < regs.size(); ++i) {
+ if (regs[i] && ABI_ALL_XMMS[i]) {
+ code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
+ frame_info.xmm_offset += 0x10;
+ }
+ }
+
+ if (frame_info.subtraction != 0) {
+ code.add(code.rsp, frame_info.subtraction);
+ }
+
+ // GPRs need to be popped in reverse order
+ for (std::size_t j = 0; j < regs.size(); ++j) {
+ const std::size_t i = regs.size() - j - 1;
+ if (regs[i] && ABI_ALL_GPRS[i]) {
+ code.pop(IndexToReg64(i));
+ }
+ }
+}
+
+} // namespace Common::X64
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
new file mode 100644
index 000000000..df17f8cbe
--- /dev/null
+++ b/src/common/x64/xbyak_util.h
@@ -0,0 +1,47 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include <xbyak.h>
+#include "common/x64/xbyak_abi.h"
+
+namespace Common::X64 {
+
+// Constants for use with cmpps/cmpss
+enum {
+ CMP_EQ = 0,
+ CMP_LT = 1,
+ CMP_LE = 2,
+ CMP_UNORD = 3,
+ CMP_NEQ = 4,
+ CMP_NLT = 5,
+ CMP_NLE = 6,
+ CMP_ORD = 7,
+};
+
+constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
+ const u64 distance = target - (ref + 5);
+ return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
+}
+
+inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
+ return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
+}
+
+template <typename T>
+inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
+ static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
+ size_t addr = reinterpret_cast<size_t>(f);
+ if (IsWithin2G(code, addr)) {
+ code.call(f);
+ } else {
+ // ABI_RETURN is a safe temp register to use before a call
+ code.mov(ABI_RETURN, addr);
+ code.call(ABI_RETURN);
+ }
+}
+
+} // namespace Common::X64
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 8546d3602..f87d67db5 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -7,6 +7,16 @@ endif()
add_library(core STATIC
arm/arm_interface.h
arm/arm_interface.cpp
+ arm/cpu_interrupt_handler.cpp
+ arm/cpu_interrupt_handler.h
+ arm/dynarmic/arm_dynarmic_32.cpp
+ arm/dynarmic/arm_dynarmic_32.h
+ arm/dynarmic/arm_dynarmic_64.cpp
+ arm/dynarmic/arm_dynarmic_64.h
+ arm/dynarmic/arm_dynarmic_cp15.cpp
+ arm/dynarmic/arm_dynarmic_cp15.h
+ arm/dynarmic/arm_exclusive_monitor.cpp
+ arm/dynarmic/arm_exclusive_monitor.h
arm/exclusive_monitor.cpp
arm/exclusive_monitor.h
arm/unicorn/arm_unicorn.cpp
@@ -15,8 +25,6 @@ add_library(core STATIC
constants.h
core.cpp
core.h
- core_manager.cpp
- core_manager.h
core_timing.cpp
core_timing.h
core_timing_util.cpp
@@ -606,11 +614,11 @@ endif()
create_target_directory_groups(core)
target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
-target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt json-headers mbedtls opus unicorn)
+target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn zip)
if (YUZU_ENABLE_BOXCAT)
target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT)
- target_link_libraries(core PRIVATE httplib json-headers zip)
+ target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json)
endif()
if (ENABLE_WEB_SERVICE)
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index d079a1bc8..d2295ed90 100644
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -139,6 +139,63 @@ std::optional<std::string> GetSymbolName(const Symbols& symbols, VAddr func_addr
constexpr u64 SEGMENT_BASE = 0x7100000000ull;
+std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktraceFromContext(
+ System& system, const ThreadContext64& ctx) {
+ std::vector<BacktraceEntry> out;
+ auto& memory = system.Memory();
+
+ auto fp = ctx.cpu_registers[29];
+ auto lr = ctx.cpu_registers[30];
+ while (true) {
+ out.push_back({"", 0, lr, 0});
+ if (!fp) {
+ break;
+ }
+ lr = memory.Read64(fp + 8) - 4;
+ fp = memory.Read64(fp);
+ }
+
+ std::map<VAddr, std::string> modules;
+ auto& loader{system.GetAppLoader()};
+ if (loader.ReadNSOModules(modules) != Loader::ResultStatus::Success) {
+ return {};
+ }
+
+ std::map<std::string, Symbols> symbols;
+ for (const auto& module : modules) {
+ symbols.insert_or_assign(module.second, GetSymbols(module.first, memory));
+ }
+
+ for (auto& entry : out) {
+ VAddr base = 0;
+ for (auto iter = modules.rbegin(); iter != modules.rend(); ++iter) {
+ const auto& module{*iter};
+ if (entry.original_address >= module.first) {
+ entry.module = module.second;
+ base = module.first;
+ break;
+ }
+ }
+
+ entry.offset = entry.original_address - base;
+ entry.address = SEGMENT_BASE + entry.offset;
+
+ if (entry.module.empty())
+ entry.module = "unknown";
+
+ const auto symbol_set = symbols.find(entry.module);
+ if (symbol_set != symbols.end()) {
+ const auto symbol = GetSymbolName(symbol_set->second, entry.offset);
+ if (symbol.has_value()) {
+ // TODO(DarkLordZach): Add demangling of symbol names.
+ entry.name = *symbol;
+ }
+ }
+ }
+
+ return out;
+}
+
std::vector<ARM_Interface::BacktraceEntry> ARM_Interface::GetBacktrace() const {
std::vector<BacktraceEntry> out;
auto& memory = system.Memory();
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index cb2e640e2..1f24051e4 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -7,6 +7,7 @@
#include <array>
#include <vector>
#include "common/common_types.h"
+#include "core/hardware_properties.h"
namespace Common {
struct PageTable;
@@ -18,25 +19,29 @@ enum class VMAPermission : u8;
namespace Core {
class System;
+class CPUInterruptHandler;
+
+using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>;
/// Generic ARMv8 CPU interface
class ARM_Interface : NonCopyable {
public:
- explicit ARM_Interface(System& system_) : system{system_} {}
+ explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers, bool uses_wall_clock)
+ : system{system_}, interrupt_handlers{interrupt_handlers}, uses_wall_clock{
+ uses_wall_clock} {}
virtual ~ARM_Interface() = default;
struct ThreadContext32 {
std::array<u32, 16> cpu_registers{};
+ std::array<u32, 64> extension_registers{};
u32 cpsr{};
- std::array<u8, 4> padding{};
- std::array<u64, 32> fprs{};
u32 fpscr{};
u32 fpexc{};
u32 tpidr{};
};
// Internally within the kernel, it expects the AArch32 version of the
// thread context to be 344 bytes in size.
- static_assert(sizeof(ThreadContext32) == 0x158);
+ static_assert(sizeof(ThreadContext32) == 0x150);
struct ThreadContext64 {
std::array<u64, 31> cpu_registers{};
@@ -143,6 +148,8 @@ public:
*/
virtual void SetTPIDR_EL0(u64 value) = 0;
+ virtual void ChangeProcessorID(std::size_t new_core_id) = 0;
+
virtual void SaveContext(ThreadContext32& ctx) = 0;
virtual void SaveContext(ThreadContext64& ctx) = 0;
virtual void LoadContext(const ThreadContext32& ctx) = 0;
@@ -162,6 +169,9 @@ public:
std::string name;
};
+ static std::vector<BacktraceEntry> GetBacktraceFromContext(System& system,
+ const ThreadContext64& ctx);
+
std::vector<BacktraceEntry> GetBacktrace() const;
/// fp (= r29) points to the last frame record.
@@ -175,6 +185,8 @@ public:
protected:
/// System context that this ARM interface is running under.
System& system;
+ CPUInterrupts& interrupt_handlers;
+ bool uses_wall_clock;
};
} // namespace Core
diff --git a/src/core/arm/cpu_interrupt_handler.cpp b/src/core/arm/cpu_interrupt_handler.cpp
new file mode 100644
index 000000000..2f1a1a269
--- /dev/null
+++ b/src/core/arm/cpu_interrupt_handler.cpp
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/thread.h"
+#include "core/arm/cpu_interrupt_handler.h"
+
+namespace Core {
+
+CPUInterruptHandler::CPUInterruptHandler() : is_interrupted{} {
+ interrupt_event = std::make_unique<Common::Event>();
+}
+
+CPUInterruptHandler::~CPUInterruptHandler() = default;
+
+void CPUInterruptHandler::SetInterrupt(bool is_interrupted_) {
+ if (is_interrupted_) {
+ interrupt_event->Set();
+ }
+ this->is_interrupted = is_interrupted_;
+}
+
+void CPUInterruptHandler::AwaitInterrupt() {
+ interrupt_event->Wait();
+}
+
+} // namespace Core
diff --git a/src/core/arm/cpu_interrupt_handler.h b/src/core/arm/cpu_interrupt_handler.h
new file mode 100644
index 000000000..3d062d326
--- /dev/null
+++ b/src/core/arm/cpu_interrupt_handler.h
@@ -0,0 +1,39 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+namespace Common {
+class Event;
+}
+
+namespace Core {
+
+class CPUInterruptHandler {
+public:
+ CPUInterruptHandler();
+ ~CPUInterruptHandler();
+
+ CPUInterruptHandler(const CPUInterruptHandler&) = delete;
+ CPUInterruptHandler& operator=(const CPUInterruptHandler&) = delete;
+
+ CPUInterruptHandler(CPUInterruptHandler&&) = default;
+ CPUInterruptHandler& operator=(CPUInterruptHandler&&) = default;
+
+ bool IsInterrupted() const {
+ return is_interrupted;
+ }
+
+ void SetInterrupt(bool is_interrupted);
+
+ void AwaitInterrupt();
+
+private:
+ bool is_interrupted{};
+ std::unique_ptr<Common::Event> interrupt_event;
+};
+
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 9bc86e3b9..0d4ab95b7 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -7,15 +7,17 @@
#include <dynarmic/A32/a32.h>
#include <dynarmic/A32/config.h>
#include <dynarmic/A32/context.h>
-#include "common/microprofile.h"
+#include "common/logging/log.h"
+#include "common/page_table.h"
+#include "core/arm/cpu_interrupt_handler.h"
#include "core/arm/dynarmic/arm_dynarmic_32.h"
-#include "core/arm/dynarmic/arm_dynarmic_64.h"
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
+#include "core/arm/dynarmic/arm_exclusive_monitor.h"
#include "core/core.h"
-#include "core/core_manager.h"
#include "core/core_timing.h"
#include "core/hle/kernel/svc.h"
#include "core/memory.h"
+#include "core/settings.h"
namespace Core {
@@ -49,8 +51,22 @@ public:
parent.system.Memory().Write64(vaddr, value);
}
+ bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
+ return parent.system.Memory().WriteExclusive8(vaddr, value, expected);
+ }
+ bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
+ return parent.system.Memory().WriteExclusive16(vaddr, value, expected);
+ }
+ bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
+ return parent.system.Memory().WriteExclusive32(vaddr, value, expected);
+ }
+ bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
+ return parent.system.Memory().WriteExclusive64(vaddr, value, expected);
+ }
+
void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
- UNIMPLEMENTED();
+ UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
+ MemoryReadCode(pc));
}
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
@@ -61,7 +77,7 @@ public:
case Dynarmic::A32::Exception::Breakpoint:
break;
}
- LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
+ LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
UNIMPLEMENTED();
}
@@ -71,26 +87,36 @@ public:
}
void AddTicks(u64 ticks) override {
+ if (parent.uses_wall_clock) {
+ return;
+ }
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off
// if not all cores are doing a similar amount of work. Instead of doing this, we should
// device a way so that timing is consistent across all cores without increasing the ticks 4
// times.
- u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES;
+ u64 amortized_ticks =
+ (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
// Always execute at least one tick.
amortized_ticks = std::max<u64>(amortized_ticks, 1);
parent.system.CoreTiming().AddTicks(amortized_ticks);
num_interpreted_instructions = 0;
}
+
u64 GetTicksRemaining() override {
- return std::max(parent.system.CoreTiming().GetDowncount(), {});
+ if (parent.uses_wall_clock) {
+ if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
+ return minimum_run_cycles;
+ }
+ return 0U;
+ }
+ return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
}
ARM_Dynarmic_32& parent;
std::size_t num_interpreted_instructions{};
- u64 tpidrro_el0{};
- u64 tpidr_el0{};
+ static constexpr u64 minimum_run_cycles = 1000U;
};
std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@@ -99,26 +125,46 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
config.callbacks = cb.get();
// TODO(bunnei): Implement page table for 32-bit
// config.page_table = &page_table.pointers;
- config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]);
+ config.coprocessors[15] = cp15;
config.define_unpredictable_behaviour = true;
+ static constexpr std::size_t PAGE_BITS = 12;
+ static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS);
+ config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>(
+ page_table.pointers.data());
+ config.absolute_offset_page_table = true;
+ config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
+ config.only_detect_misalignment_via_page_table_on_page_boundary = true;
+
+ // Multi-process state
+ config.processor_id = core_index;
+ config.global_monitor = &exclusive_monitor.monitor;
+
+ // Timing
+ config.wall_clock_cntpct = uses_wall_clock;
+
+ // Optimizations
+ if (Settings::values.disable_cpu_opt) {
+ config.enable_optimizations = false;
+ config.enable_fast_dispatch = false;
+ }
+
return std::make_unique<Dynarmic::A32::Jit>(config);
}
-MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
-
void ARM_Dynarmic_32::Run() {
- MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32);
jit->Run();
}
void ARM_Dynarmic_32::Step() {
- cb->InterpreterFallback(jit->Regs()[15], 1);
+ jit->Step();
}
-ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
+ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers,
+ bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
std::size_t core_index)
- : ARM_Interface{system},
- cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
+ : ARM_Interface{system, interrupt_handlers, uses_wall_clock},
+ cb(std::make_unique<DynarmicCallbacks32>(*this)),
+ cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
@@ -154,32 +200,40 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
}
u64 ARM_Dynarmic_32::GetTlsAddress() const {
- return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
+ return cp15->uro;
}
void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
- CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address);
+ cp15->uro = static_cast<u32>(address);
}
u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
- return cb->tpidr_el0;
+ return cp15->uprw;
}
void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
- cb->tpidr_el0 = value;
+ cp15->uprw = static_cast<u32>(value);
+}
+
+void ARM_Dynarmic_32::ChangeProcessorID(std::size_t new_core_id) {
+ jit->ChangeProcessorID(new_core_id);
}
void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
Dynarmic::A32::Context context;
jit->SaveContext(context);
ctx.cpu_registers = context.Regs();
+ ctx.extension_registers = context.ExtRegs();
ctx.cpsr = context.Cpsr();
+ ctx.fpscr = context.Fpscr();
}
void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
Dynarmic::A32::Context context;
context.Regs() = ctx.cpu_registers;
+ context.ExtRegs() = ctx.extension_registers;
context.SetCpsr(ctx.cpsr);
+ context.SetFpscr(ctx.fpscr);
jit->LoadContext(context);
}
@@ -188,10 +242,15 @@ void ARM_Dynarmic_32::PrepareReschedule() {
}
void ARM_Dynarmic_32::ClearInstructionCache() {
+ if (!jit) {
+ return;
+ }
jit->ClearCache();
}
-void ARM_Dynarmic_32::ClearExclusiveState() {}
+void ARM_Dynarmic_32::ClearExclusiveState() {
+ jit->ClearExclusiveState();
+}
void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table,
std::size_t new_address_space_size_in_bits) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 8ba9cea8f..2bab31b92 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -9,7 +9,7 @@
#include <dynarmic/A32/a32.h>
#include <dynarmic/A64/a64.h>
-#include <dynarmic/A64/exclusive_monitor.h>
+#include <dynarmic/exclusive_monitor.h>
#include "common/common_types.h"
#include "common/hash.h"
#include "core/arm/arm_interface.h"
@@ -21,13 +21,16 @@ class Memory;
namespace Core {
+class CPUInterruptHandler;
class DynarmicCallbacks32;
+class DynarmicCP15;
class DynarmicExclusiveMonitor;
class System;
class ARM_Dynarmic_32 final : public ARM_Interface {
public:
- ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+ ARM_Dynarmic_32(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
+ ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
~ARM_Dynarmic_32() override;
void SetPC(u64 pc) override;
@@ -44,6 +47,7 @@ public:
void SetTlsAddress(VAddr address) override;
void SetTPIDR_EL0(u64 value) override;
u64 GetTPIDR_EL0() const override;
+ void ChangeProcessorID(std::size_t new_core_id) override;
void SaveContext(ThreadContext32& ctx) override;
void SaveContext(ThreadContext64& ctx) override {}
@@ -66,12 +70,14 @@ private:
std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
friend class DynarmicCallbacks32;
+ friend class DynarmicCP15;
+
std::unique_ptr<DynarmicCallbacks32> cb;
JitCacheType jit_cache;
std::shared_ptr<Dynarmic::A32::Jit> jit;
+ std::shared_ptr<DynarmicCP15> cp15;
std::size_t core_index;
DynarmicExclusiveMonitor& exclusive_monitor;
- std::array<u32, 84> CP15_regs{};
};
} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 337b97be9..790981034 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -7,11 +7,11 @@
#include <dynarmic/A64/a64.h>
#include <dynarmic/A64/config.h>
#include "common/logging/log.h"
-#include "common/microprofile.h"
#include "common/page_table.h"
+#include "core/arm/cpu_interrupt_handler.h"
#include "core/arm/dynarmic/arm_dynarmic_64.h"
+#include "core/arm/dynarmic/arm_exclusive_monitor.h"
#include "core/core.h"
-#include "core/core_manager.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/gdbstub/gdbstub.h"
@@ -65,6 +65,22 @@ public:
memory.Write64(vaddr + 8, value[1]);
}
+ bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override {
+ return parent.system.Memory().WriteExclusive8(vaddr, value, expected);
+ }
+ bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override {
+ return parent.system.Memory().WriteExclusive16(vaddr, value, expected);
+ }
+ bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override {
+ return parent.system.Memory().WriteExclusive32(vaddr, value, expected);
+ }
+ bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override {
+ return parent.system.Memory().WriteExclusive64(vaddr, value, expected);
+ }
+ bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override {
+ return parent.system.Memory().WriteExclusive128(vaddr, value, expected);
+ }
+
void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,
num_instructions, MemoryReadCode(pc));
@@ -98,8 +114,8 @@ public:
}
[[fallthrough]];
default:
- ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})",
- static_cast<std::size_t>(exception), pc);
+ ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
+ static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
}
}
@@ -108,29 +124,42 @@ public:
}
void AddTicks(u64 ticks) override {
+ if (parent.uses_wall_clock) {
+ return;
+ }
// Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
// rough approximation of the amount of executed ticks in the system, it may be thrown off
// if not all cores are doing a similar amount of work. Instead of doing this, we should
// device a way so that timing is consistent across all cores without increasing the ticks 4
// times.
- u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES;
+ u64 amortized_ticks =
+ (ticks - num_interpreted_instructions) / Core::Hardware::NUM_CPU_CORES;
// Always execute at least one tick.
amortized_ticks = std::max<u64>(amortized_ticks, 1);
parent.system.CoreTiming().AddTicks(amortized_ticks);
num_interpreted_instructions = 0;
}
+
u64 GetTicksRemaining() override {
- return std::max(parent.system.CoreTiming().GetDowncount(), s64{0});
+ if (parent.uses_wall_clock) {
+ if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) {
+ return minimum_run_cycles;
+ }
+ return 0U;
+ }
+ return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
}
+
u64 GetCNTPCT() override {
- return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
+ return parent.system.CoreTiming().GetClockTicks();
}
ARM_Dynarmic_64& parent;
std::size_t num_interpreted_instructions = 0;
u64 tpidrro_el0 = 0;
u64 tpidr_el0 = 0;
+ static constexpr u64 minimum_run_cycles = 1000U;
};
std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table,
@@ -168,14 +197,13 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
config.enable_fast_dispatch = false;
}
+ // Timing
+ config.wall_clock_cntpct = uses_wall_clock;
+
return std::make_shared<Dynarmic::A64::Jit>(config);
}
-MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
-
void ARM_Dynarmic_64::Run() {
- MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64);
-
jit->Run();
}
@@ -183,11 +211,16 @@ void ARM_Dynarmic_64::Step() {
cb->InterpreterFallback(jit->GetPC(), 1);
}
-ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor,
+ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers,
+ bool uses_wall_clock, ExclusiveMonitor& exclusive_monitor,
std::size_t core_index)
- : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks64>(*this)),
- inner_unicorn{system, ARM_Unicorn::Arch::AArch64}, core_index{core_index},
- exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
+ : ARM_Interface{system, interrupt_handlers, uses_wall_clock},
+ cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system, interrupt_handlers,
+ uses_wall_clock,
+ ARM_Unicorn::Arch::AArch64,
+ core_index},
+ core_index{core_index}, exclusive_monitor{
+ dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;
@@ -239,6 +272,10 @@ void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) {
cb->tpidr_el0 = value;
}
+void ARM_Dynarmic_64::ChangeProcessorID(std::size_t new_core_id) {
+ jit->ChangeProcessorID(new_core_id);
+}
+
void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) {
ctx.cpu_registers = jit->GetRegisters();
ctx.sp = jit->GetSP();
@@ -266,6 +303,9 @@ void ARM_Dynarmic_64::PrepareReschedule() {
}
void ARM_Dynarmic_64::ClearInstructionCache() {
+ if (!jit) {
+ return;
+ }
jit->ClearCache();
}
@@ -285,44 +325,4 @@ void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table,
jit_cache.emplace(key, jit);
}
-DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count)
- : monitor(core_count), memory{memory} {}
-
-DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
-
-void DynarmicExclusiveMonitor::SetExclusive(std::size_t core_index, VAddr addr) {
- // Size doesn't actually matter.
- monitor.Mark(core_index, addr, 16);
-}
-
-void DynarmicExclusiveMonitor::ClearExclusive() {
- monitor.Clear();
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
- return monitor.DoExclusiveOperation(core_index, vaddr, 1, [&] { memory.Write8(vaddr, value); });
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
- return monitor.DoExclusiveOperation(core_index, vaddr, 2,
- [&] { memory.Write16(vaddr, value); });
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
- return monitor.DoExclusiveOperation(core_index, vaddr, 4,
- [&] { memory.Write32(vaddr, value); });
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
- return monitor.DoExclusiveOperation(core_index, vaddr, 8,
- [&] { memory.Write64(vaddr, value); });
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
- return monitor.DoExclusiveOperation(core_index, vaddr, 16, [&] {
- memory.Write64(vaddr + 0, value[0]);
- memory.Write64(vaddr + 8, value[1]);
- });
-}
-
} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 647cecaf0..403c55961 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -8,7 +8,6 @@
#include <unordered_map>
#include <dynarmic/A64/a64.h>
-#include <dynarmic/A64/exclusive_monitor.h>
#include "common/common_types.h"
#include "common/hash.h"
#include "core/arm/arm_interface.h"
@@ -22,12 +21,14 @@ class Memory;
namespace Core {
class DynarmicCallbacks64;
+class CPUInterruptHandler;
class DynarmicExclusiveMonitor;
class System;
class ARM_Dynarmic_64 final : public ARM_Interface {
public:
- ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+ ARM_Dynarmic_64(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
+ ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
~ARM_Dynarmic_64() override;
void SetPC(u64 pc) override;
@@ -44,6 +45,7 @@ public:
void SetTlsAddress(VAddr address) override;
void SetTPIDR_EL0(u64 value) override;
u64 GetTPIDR_EL0() const override;
+ void ChangeProcessorID(std::size_t new_core_id) override;
void SaveContext(ThreadContext32& ctx) override {}
void SaveContext(ThreadContext64& ctx) override;
@@ -75,24 +77,4 @@ private:
DynarmicExclusiveMonitor& exclusive_monitor;
};
-class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
-public:
- explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count);
- ~DynarmicExclusiveMonitor() override;
-
- void SetExclusive(std::size_t core_index, VAddr addr) override;
- void ClearExclusive() override;
-
- bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
- bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
- bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
- bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
- bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;
-
-private:
- friend class ARM_Dynarmic_64;
- Dynarmic::A64::ExclusiveMonitor monitor;
- Core::Memory::Memory& memory;
-};
-
} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
index 3fdcdebde..54556e0f9 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -2,79 +2,132 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <fmt/format.h>
+#include "common/logging/log.h"
+#include "core/arm/dynarmic/arm_dynarmic_32.h"
#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
using Callback = Dynarmic::A32::Coprocessor::Callback;
using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
+template <>
+struct fmt::formatter<Dynarmic::A32::CoprocReg> {
+ constexpr auto parse(format_parse_context& ctx) {
+ return ctx.begin();
+ }
+ template <typename FormatContext>
+ auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
+ return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
+ }
+};
+
+namespace Core {
+
+static u32 dummy_value;
+
std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
CoprocReg CRd, CoprocReg CRn,
CoprocReg CRm, unsigned opc2) {
+ LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
+ CRm, opc2);
return {};
}
CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
CoprocReg CRm, unsigned opc2) {
- // TODO(merry): Privileged CP15 registers
-
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
+ // CP15_FLUSH_PREFETCH_BUFFER
// This is a dummy write, we ignore the value written here.
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)];
+ return &dummy_value;
}
if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
switch (opc2) {
case 4:
+ // CP15_DATA_SYNC_BARRIER
// This is a dummy write, we ignore the value written here.
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)];
+ return &dummy_value;
case 5:
+ // CP15_DATA_MEMORY_BARRIER
// This is a dummy write, we ignore the value written here.
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)];
- default:
- return {};
+ return &dummy_value;
}
}
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
+ // CP15_THREAD_UPRW
+ return &uprw;
}
+ LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
+ opc2);
return {};
}
CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
+ LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
return {};
}
CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
CoprocReg CRm, unsigned opc2) {
- // TODO(merry): Privileged CP15 registers
-
if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
switch (opc2) {
case 2:
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
+ // CP15_THREAD_UPRW
+ return &uprw;
case 3:
- return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
- default:
- return {};
+ // CP15_THREAD_URO
+ return &uro;
}
}
+ LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
+ opc2);
return {};
}
CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
+ if (!two && opc == 0 && CRm == CoprocReg::C14) {
+ // CNTPCT
+ const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
+ [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
+ ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
+ return parent.system.CoreTiming().GetClockTicks();
+ });
+ return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
+ }
+
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
return {};
}
std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
std::optional<u8> option) {
+ if (option) {
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
+ long_transfer ? "l" : "", CRd, *option);
+ } else {
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
+ long_transfer ? "l" : "", CRd);
+ }
return {};
}
std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
std::optional<u8> option) {
+ if (option) {
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
+ long_transfer ? "l" : "", CRd, *option);
+ } else {
+ LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
+ long_transfer ? "l" : "", CRd);
+ }
return {};
}
+
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
index 07bcde5f9..7356d252e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -10,128 +10,15 @@
#include <dynarmic/A32/coprocessor.h>
#include "common/common_types.h"
-enum class CP15Register {
- // c0 - Information registers
- CP15_MAIN_ID,
- CP15_CACHE_TYPE,
- CP15_TCM_STATUS,
- CP15_TLB_TYPE,
- CP15_CPU_ID,
- CP15_PROCESSOR_FEATURE_0,
- CP15_PROCESSOR_FEATURE_1,
- CP15_DEBUG_FEATURE_0,
- CP15_AUXILIARY_FEATURE_0,
- CP15_MEMORY_MODEL_FEATURE_0,
- CP15_MEMORY_MODEL_FEATURE_1,
- CP15_MEMORY_MODEL_FEATURE_2,
- CP15_MEMORY_MODEL_FEATURE_3,
- CP15_ISA_FEATURE_0,
- CP15_ISA_FEATURE_1,
- CP15_ISA_FEATURE_2,
- CP15_ISA_FEATURE_3,
- CP15_ISA_FEATURE_4,
+namespace Core {
- // c1 - Control registers
- CP15_CONTROL,
- CP15_AUXILIARY_CONTROL,
- CP15_COPROCESSOR_ACCESS_CONTROL,
-
- // c2 - Translation table registers
- CP15_TRANSLATION_BASE_TABLE_0,
- CP15_TRANSLATION_BASE_TABLE_1,
- CP15_TRANSLATION_BASE_CONTROL,
- CP15_DOMAIN_ACCESS_CONTROL,
- CP15_RESERVED,
-
- // c5 - Fault status registers
- CP15_FAULT_STATUS,
- CP15_INSTR_FAULT_STATUS,
- CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
- CP15_INST_FSR,
-
- // c6 - Fault Address registers
- CP15_FAULT_ADDRESS,
- CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
- CP15_WFAR,
- CP15_IFAR,
-
- // c7 - Cache operation registers
- CP15_WAIT_FOR_INTERRUPT,
- CP15_PHYS_ADDRESS,
- CP15_INVALIDATE_INSTR_CACHE,
- CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
- CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
- CP15_FLUSH_PREFETCH_BUFFER,
- CP15_FLUSH_BRANCH_TARGET_CACHE,
- CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
- CP15_INVALIDATE_DATA_CACHE,
- CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
- CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
- CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
- CP15_CLEAN_DATA_CACHE,
- CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
- CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
- CP15_DATA_SYNC_BARRIER,
- CP15_DATA_MEMORY_BARRIER,
- CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
- CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
- CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
-
- // c8 - TLB operations
- CP15_INVALIDATE_ITLB,
- CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
- CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
- CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
- CP15_INVALIDATE_DTLB,
- CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
- CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
- CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
- CP15_INVALIDATE_UTLB,
- CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
- CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
- CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
-
- // c9 - Data cache lockdown register
- CP15_DATA_CACHE_LOCKDOWN,
-
- // c10 - TLB/Memory map registers
- CP15_TLB_LOCKDOWN,
- CP15_PRIMARY_REGION_REMAP,
- CP15_NORMAL_REGION_REMAP,
-
- // c13 - Thread related registers
- CP15_PID,
- CP15_CONTEXT_ID,
- CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
- CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
- CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
-
- // c15 - Performance and TLB lockdown registers
- CP15_PERFORMANCE_MONITOR_CONTROL,
- CP15_CYCLE_COUNTER,
- CP15_COUNT_0,
- CP15_COUNT_1,
- CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
- CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
- CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
- CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
- CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
- CP15_TLB_DEBUG_CONTROL,
-
- // Skyeye defined
- CP15_TLB_FAULT_ADDR,
- CP15_TLB_FAULT_STATUS,
-
- // Not an actual register.
- // All registers should be defined above this.
- CP15_REGISTER_COUNT,
-};
+class ARM_Dynarmic_32;
class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
public:
using CoprocReg = Dynarmic::A32::CoprocReg;
- explicit DynarmicCP15(u32* cp15) : CP15(cp15){};
+ explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
CoprocReg CRn, CoprocReg CRm,
@@ -147,6 +34,9 @@ public:
std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
std::optional<u8> option) override;
-private:
- u32* CP15{};
+ ARM_Dynarmic_32& parent;
+ u32 uprw;
+ u32 uro;
};
+
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
new file mode 100644
index 000000000..4e209f6a5
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
@@ -0,0 +1,76 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cinttypes>
+#include <memory>
+#include "core/arm/dynarmic/arm_exclusive_monitor.h"
+#include "core/memory.h"
+
+namespace Core {
+
+DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count)
+ : monitor(core_count), memory{memory} {}
+
+DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
+
+u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) {
+ return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); });
+}
+
+u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) {
+ return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); });
+}
+
+u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) {
+ return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); });
+}
+
+u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) {
+ return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); });
+}
+
+u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr addr) {
+ return monitor.ReadAndMark<u128>(core_index, addr, [&]() -> u128 {
+ u128 result;
+ result[0] = memory.Read64(addr);
+ result[1] = memory.Read64(addr + 8);
+ return result;
+ });
+}
+
+void DynarmicExclusiveMonitor::ClearExclusive() {
+ monitor.Clear();
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
+ return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool {
+ return memory.WriteExclusive8(vaddr, value, expected);
+ });
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
+ return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool {
+ return memory.WriteExclusive16(vaddr, value, expected);
+ });
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
+ return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool {
+ return memory.WriteExclusive32(vaddr, value, expected);
+ });
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
+ return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool {
+ return memory.WriteExclusive64(vaddr, value, expected);
+ });
+}
+
+bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
+ return monitor.DoExclusiveOperation<u128>(core_index, vaddr, [&](u128 expected) -> bool {
+ return memory.WriteExclusive128(vaddr, value, expected);
+ });
+}
+
+} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_exclusive_monitor.h b/src/core/arm/dynarmic/arm_exclusive_monitor.h
new file mode 100644
index 000000000..964f4a55d
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h
@@ -0,0 +1,48 @@
+// Copyright 2020 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include <dynarmic/exclusive_monitor.h>
+
+#include "common/common_types.h"
+#include "core/arm/dynarmic/arm_dynarmic_32.h"
+#include "core/arm/dynarmic/arm_dynarmic_64.h"
+#include "core/arm/exclusive_monitor.h"
+
+namespace Core::Memory {
+class Memory;
+}
+
+namespace Core {
+
+class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
+public:
+ explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count);
+ ~DynarmicExclusiveMonitor() override;
+
+ u8 ExclusiveRead8(std::size_t core_index, VAddr addr) override;
+ u16 ExclusiveRead16(std::size_t core_index, VAddr addr) override;
+ u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
+ u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
+ u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
+ void ClearExclusive() override;
+
+ bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
+ bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
+ bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
+ bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
+ bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;
+
+private:
+ friend class ARM_Dynarmic_32;
+ friend class ARM_Dynarmic_64;
+ Dynarmic::ExclusiveMonitor monitor;
+ Core::Memory::Memory& memory;
+};
+
+} // namespace Core
diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp
index b32401e0b..d8cba369d 100644
--- a/src/core/arm/exclusive_monitor.cpp
+++ b/src/core/arm/exclusive_monitor.cpp
@@ -3,7 +3,7 @@
// Refer to the license.txt file included.
#ifdef ARCHITECTURE_x86_64
-#include "core/arm/dynarmic/arm_dynarmic_64.h"
+#include "core/arm/dynarmic/arm_exclusive_monitor.h"
#endif
#include "core/arm/exclusive_monitor.h"
#include "core/memory.h"
diff --git a/src/core/arm/exclusive_monitor.h b/src/core/arm/exclusive_monitor.h
index ccd73b80f..62f6e6023 100644
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@@ -18,7 +18,11 @@ class ExclusiveMonitor {
public:
virtual ~ExclusiveMonitor();
- virtual void SetExclusive(std::size_t core_index, VAddr addr) = 0;
+ virtual u8 ExclusiveRead8(std::size_t core_index, VAddr addr) = 0;
+ virtual u16 ExclusiveRead16(std::size_t core_index, VAddr addr) = 0;
+ virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
+ virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
+ virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
virtual void ClearExclusive() = 0;
virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index e40e9626a..1df3f3ed1 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -6,6 +6,7 @@
#include <unicorn/arm64.h>
#include "common/assert.h"
#include "common/microprofile.h"
+#include "core/arm/cpu_interrupt_handler.h"
#include "core/arm/unicorn/arm_unicorn.h"
#include "core/core.h"
#include "core/core_timing.h"
@@ -62,7 +63,9 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
return false;
}
-ARM_Unicorn::ARM_Unicorn(System& system, Arch architecture) : ARM_Interface{system} {
+ARM_Unicorn::ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
+ Arch architecture, std::size_t core_index)
+ : ARM_Interface{system, interrupt_handlers, uses_wall_clock}, core_index{core_index} {
const auto arch = architecture == Arch::AArch32 ? UC_ARCH_ARM : UC_ARCH_ARM64;
CHECKED(uc_open(arch, UC_MODE_ARM, &uc));
@@ -156,12 +159,20 @@ void ARM_Unicorn::SetTPIDR_EL0(u64 value) {
CHECKED(uc_reg_write(uc, UC_ARM64_REG_TPIDR_EL0, &value));
}
+void ARM_Unicorn::ChangeProcessorID(std::size_t new_core_id) {
+ core_index = new_core_id;
+}
+
void ARM_Unicorn::Run() {
if (GDBStub::IsServerEnabled()) {
ExecuteInstructions(std::max(4000000U, 0U));
} else {
- ExecuteInstructions(
- std::max(std::size_t(system.CoreTiming().GetDowncount()), std::size_t{0}));
+ while (true) {
+ if (interrupt_handlers[core_index].IsInterrupted()) {
+ return;
+ }
+ ExecuteInstructions(10);
+ }
}
}
@@ -183,8 +194,6 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
UC_PROT_READ | UC_PROT_WRITE | UC_PROT_EXEC, page_buffer.data()));
CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
CHECKED(uc_mem_unmap(uc, map_addr, page_buffer.size()));
-
- system.CoreTiming().AddTicks(num_instructions);
if (GDBStub::IsServerEnabled()) {
if (last_bkpt_hit && last_bkpt.type == GDBStub::BreakpointType::Execute) {
uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 725c65085..810aff311 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -20,7 +20,8 @@ public:
AArch64, // 64-bit ARM
};
- explicit ARM_Unicorn(System& system, Arch architecture);
+ explicit ARM_Unicorn(System& system, CPUInterrupts& interrupt_handlers, bool uses_wall_clock,
+ Arch architecture, std::size_t core_index);
~ARM_Unicorn() override;
void SetPC(u64 pc) override;
@@ -35,6 +36,7 @@ public:
void SetTlsAddress(VAddr address) override;
void SetTPIDR_EL0(u64 value) override;
u64 GetTPIDR_EL0() const override;
+ void ChangeProcessorID(std::size_t new_core_id) override;
void PrepareReschedule() override;
void ClearExclusiveState() override;
void ExecuteInstructions(std::size_t num_instructions);
@@ -55,6 +57,7 @@ private:
uc_engine* uc{};
GDBStub::BreakpointAddress last_bkpt{};
bool last_bkpt_hit = false;
+ std::size_t core_index;
};
} // namespace Core
diff --git a/src/core/core.cpp b/src/core/core.cpp
index f9f8a3000..1a243c515 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -8,10 +8,10 @@
#include "common/file_util.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/string_util.h"
#include "core/arm/exclusive_monitor.h"
#include "core/core.h"
-#include "core/core_manager.h"
#include "core/core_timing.h"
#include "core/cpu_manager.h"
#include "core/device_memory.h"
@@ -51,6 +51,11 @@
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU0, "ARM JIT", "Dynarmic CPU 0", MP_RGB(255, 64, 64));
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU1, "ARM JIT", "Dynarmic CPU 1", MP_RGB(255, 64, 64));
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU2, "ARM JIT", "Dynarmic CPU 2", MP_RGB(255, 64, 64));
+MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_CPU3, "ARM JIT", "Dynarmic CPU 3", MP_RGB(255, 64, 64));
+
namespace Core {
namespace {
@@ -117,23 +122,22 @@ struct System::Impl {
: kernel{system}, fs_controller{system}, memory{system},
cpu_manager{system}, reporter{system}, applet_manager{system} {}
- CoreManager& CurrentCoreManager() {
- return cpu_manager.GetCurrentCoreManager();
- }
+ ResultStatus Run() {
+ status = ResultStatus::Success;
- Kernel::PhysicalCore& CurrentPhysicalCore() {
- const auto index = cpu_manager.GetActiveCoreIndex();
- return kernel.PhysicalCore(index);
- }
+ kernel.Suspend(false);
+ core_timing.SyncPause(false);
+ cpu_manager.Pause(false);
- Kernel::PhysicalCore& GetPhysicalCore(std::size_t index) {
- return kernel.PhysicalCore(index);
+ return status;
}
- ResultStatus RunLoop(bool tight_loop) {
+ ResultStatus Pause() {
status = ResultStatus::Success;
- cpu_manager.RunLoop(tight_loop);
+ core_timing.SyncPause(true);
+ kernel.Suspend(true);
+ cpu_manager.Pause(true);
return status;
}
@@ -143,7 +147,15 @@ struct System::Impl {
device_memory = std::make_unique<Core::DeviceMemory>(system);
- core_timing.Initialize();
+ is_multicore = Settings::values.use_multi_core;
+ is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation;
+
+ kernel.SetMulticore(is_multicore);
+ cpu_manager.SetMulticore(is_multicore);
+ cpu_manager.SetAsyncGpu(is_async_gpu);
+ core_timing.SetMulticore(is_multicore);
+
+ core_timing.Initialize([&system]() { system.RegisterHostThread(); });
kernel.Initialize();
cpu_manager.Initialize();
@@ -180,6 +192,11 @@ struct System::Impl {
is_powered_on = true;
exit_lock = false;
+ microprofile_dynarmic[0] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU0);
+ microprofile_dynarmic[1] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU1);
+ microprofile_dynarmic[2] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU2);
+ microprofile_dynarmic[3] = MICROPROFILE_TOKEN(ARM_Jit_Dynarmic_CPU3);
+
LOG_DEBUG(Core, "Initialized OK");
return ResultStatus::Success;
@@ -277,8 +294,6 @@ struct System::Impl {
service_manager.reset();
cheat_engine.reset();
telemetry_session.reset();
- perf_stats.reset();
- gpu_core.reset();
device_memory.reset();
// Close all CPU/threading state
@@ -290,6 +305,8 @@ struct System::Impl {
// Close app loader
app_loader.reset();
+ gpu_core.reset();
+ perf_stats.reset();
// Clear all applets
applet_manager.ClearAll();
@@ -382,25 +399,35 @@ struct System::Impl {
std::unique_ptr<Core::PerfStats> perf_stats;
Core::FrameLimiter frame_limiter;
+
+ bool is_multicore{};
+ bool is_async_gpu{};
+
+ std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
+ std::array<MicroProfileToken, Core::Hardware::NUM_CPU_CORES> microprofile_dynarmic{};
};
System::System() : impl{std::make_unique<Impl>(*this)} {}
System::~System() = default;
-CoreManager& System::CurrentCoreManager() {
- return impl->CurrentCoreManager();
+CpuManager& System::GetCpuManager() {
+ return impl->cpu_manager;
+}
+
+const CpuManager& System::GetCpuManager() const {
+ return impl->cpu_manager;
}
-const CoreManager& System::CurrentCoreManager() const {
- return impl->CurrentCoreManager();
+System::ResultStatus System::Run() {
+ return impl->Run();
}
-System::ResultStatus System::RunLoop(bool tight_loop) {
- return impl->RunLoop(tight_loop);
+System::ResultStatus System::Pause() {
+ return impl->Pause();
}
System::ResultStatus System::SingleStep() {
- return RunLoop(false);
+ return ResultStatus::Success;
}
void System::InvalidateCpuInstructionCaches() {
@@ -416,7 +443,7 @@ bool System::IsPoweredOn() const {
}
void System::PrepareReschedule() {
- impl->CurrentPhysicalCore().Stop();
+ // Deprecated, does nothing, kept for backward compatibility.
}
void System::PrepareReschedule(const u32 core_index) {
@@ -436,31 +463,41 @@ const TelemetrySession& System::TelemetrySession() const {
}
ARM_Interface& System::CurrentArmInterface() {
- return impl->CurrentPhysicalCore().ArmInterface();
+ return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();
}
const ARM_Interface& System::CurrentArmInterface() const {
- return impl->CurrentPhysicalCore().ArmInterface();
+ return impl->kernel.CurrentScheduler().GetCurrentThread()->ArmInterface();
}
std::size_t System::CurrentCoreIndex() const {
- return impl->cpu_manager.GetActiveCoreIndex();
+ std::size_t core = impl->kernel.GetCurrentHostThreadID();
+ ASSERT(core < Core::Hardware::NUM_CPU_CORES);
+ return core;
}
Kernel::Scheduler& System::CurrentScheduler() {
- return impl->CurrentPhysicalCore().Scheduler();
+ return impl->kernel.CurrentScheduler();
}
const Kernel::Scheduler& System::CurrentScheduler() const {
- return impl->CurrentPhysicalCore().Scheduler();
+ return impl->kernel.CurrentScheduler();
+}
+
+Kernel::PhysicalCore& System::CurrentPhysicalCore() {
+ return impl->kernel.CurrentPhysicalCore();
+}
+
+const Kernel::PhysicalCore& System::CurrentPhysicalCore() const {
+ return impl->kernel.CurrentPhysicalCore();
}
Kernel::Scheduler& System::Scheduler(std::size_t core_index) {
- return impl->GetPhysicalCore(core_index).Scheduler();
+ return impl->kernel.Scheduler(core_index);
}
const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const {
- return impl->GetPhysicalCore(core_index).Scheduler();
+ return impl->kernel.Scheduler(core_index);
}
/// Gets the global scheduler
@@ -490,20 +527,15 @@ const Kernel::Process* System::CurrentProcess() const {
}
ARM_Interface& System::ArmInterface(std::size_t core_index) {
- return impl->GetPhysicalCore(core_index).ArmInterface();
+ auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread();
+ ASSERT(thread && !thread->IsHLEThread());
+ return thread->ArmInterface();
}
const ARM_Interface& System::ArmInterface(std::size_t core_index) const {
- return impl->GetPhysicalCore(core_index).ArmInterface();
-}
-
-CoreManager& System::GetCoreManager(std::size_t core_index) {
- return impl->cpu_manager.GetCoreManager(core_index);
-}
-
-const CoreManager& System::GetCoreManager(std::size_t core_index) const {
- ASSERT(core_index < NUM_CPU_CORES);
- return impl->cpu_manager.GetCoreManager(core_index);
+ auto* thread = impl->kernel.Scheduler(core_index).GetCurrentThread();
+ ASSERT(thread && !thread->IsHLEThread());
+ return thread->ArmInterface();
}
ExclusiveMonitor& System::Monitor() {
@@ -722,4 +754,18 @@ void System::RegisterHostThread() {
impl->kernel.RegisterHostThread();
}
+void System::EnterDynarmicProfile() {
+ std::size_t core = impl->kernel.GetCurrentHostThreadID();
+ impl->dynarmic_ticks[core] = MicroProfileEnter(impl->microprofile_dynarmic[core]);
+}
+
+void System::ExitDynarmicProfile() {
+ std::size_t core = impl->kernel.GetCurrentHostThreadID();
+ MicroProfileLeave(impl->microprofile_dynarmic[core], impl->dynarmic_ticks[core]);
+}
+
+bool System::IsMulticore() const {
+ return impl->is_multicore;
+}
+
} // namespace Core
diff --git a/src/core/core.h b/src/core/core.h
index acc53d6a1..5c6cfbffe 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -27,6 +27,7 @@ class VfsFilesystem;
namespace Kernel {
class GlobalScheduler;
class KernelCore;
+class PhysicalCore;
class Process;
class Scheduler;
} // namespace Kernel
@@ -90,7 +91,7 @@ class InterruptManager;
namespace Core {
class ARM_Interface;
-class CoreManager;
+class CpuManager;
class DeviceMemory;
class ExclusiveMonitor;
class FrameLimiter;
@@ -136,16 +137,16 @@ public:
};
/**
- * Run the core CPU loop
- * This function runs the core for the specified number of CPU instructions before trying to
- * update hardware. This is much faster than SingleStep (and should be equivalent), as the CPU
- * is not required to do a full dispatch with each instruction. NOTE: the number of instructions
- * requested is not guaranteed to run, as this will be interrupted preemptively if a hardware
- * update is requested (e.g. on a thread switch).
- * @param tight_loop If false, the CPU single-steps.
- * @return Result status, indicating whether or not the operation succeeded.
+ * Run the OS and Application
+ * This function will start emulation and run the relevant devices
+ */
+ ResultStatus Run();
+
+ /**
+ * Pause the OS and Application
+ * This function will pause emulation and stop the relevant devices
*/
- ResultStatus RunLoop(bool tight_loop = true);
+ ResultStatus Pause();
/**
* Step the CPU one instruction
@@ -209,17 +210,21 @@ public:
/// Gets the scheduler for the CPU core that is currently running
const Kernel::Scheduler& CurrentScheduler() const;
+ /// Gets the physical core for the CPU core that is currently running
+ Kernel::PhysicalCore& CurrentPhysicalCore();
+
+ /// Gets the physical core for the CPU core that is currently running
+ const Kernel::PhysicalCore& CurrentPhysicalCore() const;
+
/// Gets a reference to an ARM interface for the CPU core with the specified index
ARM_Interface& ArmInterface(std::size_t core_index);
/// Gets a const reference to an ARM interface from the CPU core with the specified index
const ARM_Interface& ArmInterface(std::size_t core_index) const;
- /// Gets a CPU interface to the CPU core with the specified index
- CoreManager& GetCoreManager(std::size_t core_index);
+ CpuManager& GetCpuManager();
- /// Gets a CPU interface to the CPU core with the specified index
- const CoreManager& GetCoreManager(std::size_t core_index) const;
+ const CpuManager& GetCpuManager() const;
/// Gets a reference to the exclusive monitor
ExclusiveMonitor& Monitor();
@@ -370,14 +375,17 @@ public:
/// Register a host thread as an auxiliary thread.
void RegisterHostThread();
-private:
- System();
+ /// Enter Dynarmic Microprofile
+ void EnterDynarmicProfile();
+
+ /// Exit Dynarmic Microprofile
+ void ExitDynarmicProfile();
- /// Returns the currently running CPU core
- CoreManager& CurrentCoreManager();
+ /// Tells if system is running on multicore.
+ bool IsMulticore() const;
- /// Returns the currently running CPU core
- const CoreManager& CurrentCoreManager() const;
+private:
+ System();
/**
* Initialize the emulated system.
diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp
deleted file mode 100644
index b6b797c80..000000000
--- a/src/core/core_manager.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <condition_variable>
-#include <mutex>
-
-#include "common/logging/log.h"
-#include "core/arm/exclusive_monitor.h"
-#include "core/arm/unicorn/arm_unicorn.h"
-#include "core/core.h"
-#include "core/core_manager.h"
-#include "core/core_timing.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/physical_core.h"
-#include "core/hle/kernel/scheduler.h"
-#include "core/hle/kernel/thread.h"
-#include "core/hle/lock.h"
-#include "core/settings.h"
-
-namespace Core {
-
-CoreManager::CoreManager(System& system, std::size_t core_index)
- : global_scheduler{system.GlobalScheduler()}, physical_core{system.Kernel().PhysicalCore(
- core_index)},
- core_timing{system.CoreTiming()}, core_index{core_index} {}
-
-CoreManager::~CoreManager() = default;
-
-void CoreManager::RunLoop(bool tight_loop) {
- Reschedule();
-
- // If we don't have a currently active thread then don't execute instructions,
- // instead advance to the next event and try to yield to the next thread
- if (Kernel::GetCurrentThread() == nullptr) {
- LOG_TRACE(Core, "Core-{} idling", core_index);
- core_timing.Idle();
- } else {
- if (tight_loop) {
- physical_core.Run();
- } else {
- physical_core.Step();
- }
- }
- core_timing.Advance();
-
- Reschedule();
-}
-
-void CoreManager::SingleStep() {
- return RunLoop(false);
-}
-
-void CoreManager::PrepareReschedule() {
- physical_core.Stop();
-}
-
-void CoreManager::Reschedule() {
- // Lock the global kernel mutex when we manipulate the HLE state
- std::lock_guard lock(HLE::g_hle_lock);
-
- global_scheduler.SelectThread(core_index);
-
- physical_core.Scheduler().TryDoContextSwitch();
-}
-
-} // namespace Core
diff --git a/src/core/core_manager.h b/src/core/core_manager.h
deleted file mode 100644
index d525de00a..000000000
--- a/src/core/core_manager.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2018 yuzu emulator team
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <atomic>
-#include <cstddef>
-#include <memory>
-#include "common/common_types.h"
-
-namespace Kernel {
-class GlobalScheduler;
-class PhysicalCore;
-} // namespace Kernel
-
-namespace Core {
-class System;
-}
-
-namespace Core::Timing {
-class CoreTiming;
-}
-
-namespace Core::Memory {
-class Memory;
-}
-
-namespace Core {
-
-constexpr unsigned NUM_CPU_CORES{4};
-
-class CoreManager {
-public:
- CoreManager(System& system, std::size_t core_index);
- ~CoreManager();
-
- void RunLoop(bool tight_loop = true);
-
- void SingleStep();
-
- void PrepareReschedule();
-
- bool IsMainCore() const {
- return core_index == 0;
- }
-
- std::size_t CoreIndex() const {
- return core_index;
- }
-
-private:
- void Reschedule();
-
- Kernel::GlobalScheduler& global_scheduler;
- Kernel::PhysicalCore& physical_core;
- Timing::CoreTiming& core_timing;
-
- std::atomic<bool> reschedule_pending = false;
- std::size_t core_index;
-};
-
-} // namespace Core
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 46d4178c4..5c83c41a4 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -1,29 +1,27 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include "core/core_timing.h"
-
#include <algorithm>
#include <mutex>
#include <string>
#include <tuple>
#include "common/assert.h"
-#include "common/thread.h"
+#include "common/microprofile.h"
+#include "core/core_timing.h"
#include "core/core_timing_util.h"
-#include "core/hardware_properties.h"
namespace Core::Timing {
-constexpr int MAX_SLICE_LENGTH = 10000;
+constexpr u64 MAX_SLICE_LENGTH = 4000;
std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
return std::make_shared<EventType>(std::move(callback), std::move(name));
}
struct CoreTiming::Event {
- s64 time;
+ u64 time;
u64 fifo_order;
u64 userdata;
std::weak_ptr<EventType> type;
@@ -39,51 +37,90 @@ struct CoreTiming::Event {
}
};
-CoreTiming::CoreTiming() = default;
-CoreTiming::~CoreTiming() = default;
+CoreTiming::CoreTiming() {
+ clock =
+ Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ);
+}
-void CoreTiming::Initialize() {
- downcounts.fill(MAX_SLICE_LENGTH);
- time_slice.fill(MAX_SLICE_LENGTH);
- slice_length = MAX_SLICE_LENGTH;
- global_timer = 0;
- idled_cycles = 0;
- current_context = 0;
+CoreTiming::~CoreTiming() = default;
- // The time between CoreTiming being initialized and the first call to Advance() is considered
- // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before
- // executing the first cycle of each slice to prepare the slice length and downcount for
- // that slice.
- is_global_timer_sane = true;
+void CoreTiming::ThreadEntry(CoreTiming& instance) {
+ constexpr char name[] = "yuzu:HostTiming";
+ MicroProfileOnThreadCreate(name);
+ Common::SetCurrentThreadName(name);
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh);
+ instance.on_thread_init();
+ instance.ThreadLoop();
+}
+void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) {
+ on_thread_init = std::move(on_thread_init_);
event_fifo_id = 0;
-
+ shutting_down = false;
+ ticks = 0;
const auto empty_timed_callback = [](u64, s64) {};
ev_lost = CreateEvent("_lost_event", empty_timed_callback);
+ if (is_multicore) {
+ timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
+ }
}
void CoreTiming::Shutdown() {
+ paused = true;
+ shutting_down = true;
+ pause_event.Set();
+ event.Set();
+ if (timer_thread) {
+ timer_thread->join();
+ }
ClearPendingEvents();
+ timer_thread.reset();
+ has_started = false;
}
-void CoreTiming::ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type,
- u64 userdata) {
- std::lock_guard guard{inner_mutex};
- const s64 timeout = GetTicks() + cycles_into_future;
+void CoreTiming::Pause(bool is_paused) {
+ paused = is_paused;
+ pause_event.Set();
+}
- // If this event needs to be scheduled before the next advance(), force one early
- if (!is_global_timer_sane) {
- ForceExceptionCheck(cycles_into_future);
+void CoreTiming::SyncPause(bool is_paused) {
+ if (is_paused == paused && paused_set == paused) {
+ return;
+ }
+ Pause(is_paused);
+ if (timer_thread) {
+ if (!is_paused) {
+ pause_event.Set();
+ }
+ event.Set();
+ while (paused_set != is_paused)
+ ;
}
+}
- event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
+bool CoreTiming::IsRunning() const {
+ return !paused_set;
+}
- std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+bool CoreTiming::HasPendingEvents() const {
+ return !(wait_set && event_queue.empty());
}
-void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
- std::lock_guard guard{inner_mutex};
+void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
+ u64 userdata) {
+ {
+ std::scoped_lock scope{basic_lock};
+ const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
+
+ event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
+ std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+ }
+ event.Set();
+}
+
+void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
+ std::scoped_lock scope{basic_lock};
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
return e.type.lock().get() == event_type.get() && e.userdata == userdata;
});
@@ -95,21 +132,39 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u
}
}
-u64 CoreTiming::GetTicks() const {
- u64 ticks = static_cast<u64>(global_timer);
- if (!is_global_timer_sane) {
- ticks += accumulated_ticks;
+void CoreTiming::AddTicks(u64 ticks) {
+ this->ticks += ticks;
+ downcount -= ticks;
+}
+
+void CoreTiming::Idle() {
+ if (!event_queue.empty()) {
+ const u64 next_event_time = event_queue.front().time;
+ const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U;
+ if (next_ticks > ticks) {
+ ticks = next_ticks;
+ }
+ return;
}
- return ticks;
+ ticks += 1000U;
}
-u64 CoreTiming::GetIdleTicks() const {
- return static_cast<u64>(idled_cycles);
+void CoreTiming::ResetTicks() {
+ downcount = MAX_SLICE_LENGTH;
}
-void CoreTiming::AddTicks(u64 ticks) {
- accumulated_ticks += ticks;
- downcounts[current_context] -= static_cast<s64>(ticks);
+u64 CoreTiming::GetCPUTicks() const {
+ if (is_multicore) {
+ return clock->GetCPUCycles();
+ }
+ return ticks;
+}
+
+u64 CoreTiming::GetClockTicks() const {
+ if (is_multicore) {
+ return clock->GetClockCycles();
+ }
+ return CpuCyclesToClockCycles(ticks);
}
void CoreTiming::ClearPendingEvents() {
@@ -117,7 +172,7 @@ void CoreTiming::ClearPendingEvents() {
}
void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
- std::lock_guard guard{inner_mutex};
+ basic_lock.lock();
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
return e.type.lock().get() == event_type.get();
@@ -128,99 +183,72 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
event_queue.erase(itr, event_queue.end());
std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
}
+ basic_lock.unlock();
}
-void CoreTiming::ForceExceptionCheck(s64 cycles) {
- cycles = std::max<s64>(0, cycles);
- if (downcounts[current_context] <= cycles) {
- return;
- }
-
- // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int
- // here. Account for cycles already executed by adjusting the g.slice_length
- downcounts[current_context] = static_cast<int>(cycles);
-}
-
-std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const {
- const u64 original_context = current_context;
- u64 next_context = (original_context + 1) % num_cpu_cores;
- while (next_context != original_context) {
- if (time_slice[next_context] >= needed_ticks) {
- return {next_context};
- } else if (time_slice[next_context] >= 0) {
- return std::nullopt;
- }
- next_context = (next_context + 1) % num_cpu_cores;
- }
- return std::nullopt;
-}
-
-void CoreTiming::Advance() {
- std::unique_lock<std::mutex> guard(inner_mutex);
-
- const u64 cycles_executed = accumulated_ticks;
- time_slice[current_context] = std::max<s64>(0, time_slice[current_context] - accumulated_ticks);
- global_timer += cycles_executed;
-
- is_global_timer_sane = true;
+std::optional<s64> CoreTiming::Advance() {
+ std::scoped_lock advance_scope{advance_lock};
+ std::scoped_lock basic_scope{basic_lock};
+ global_timer = GetGlobalTimeNs().count();
while (!event_queue.empty() && event_queue.front().time <= global_timer) {
Event evt = std::move(event_queue.front());
std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
event_queue.pop_back();
- inner_mutex.unlock();
+ basic_lock.unlock();
if (auto event_type{evt.type.lock()}) {
event_type->callback(evt.userdata, global_timer - evt.time);
}
- inner_mutex.lock();
+ basic_lock.lock();
+ global_timer = GetGlobalTimeNs().count();
}
- is_global_timer_sane = false;
-
- // Still events left (scheduled in the future)
if (!event_queue.empty()) {
- const s64 needed_ticks =
- std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH);
- const auto next_core = NextAvailableCore(needed_ticks);
- if (next_core) {
- downcounts[*next_core] = needed_ticks;
- }
+ const s64 next_time = event_queue.front().time - global_timer;
+ return next_time;
+ } else {
+ return std::nullopt;
}
-
- accumulated_ticks = 0;
-
- downcounts[current_context] = time_slice[current_context];
}
-void CoreTiming::ResetRun() {
- downcounts.fill(MAX_SLICE_LENGTH);
- time_slice.fill(MAX_SLICE_LENGTH);
- current_context = 0;
- // Still events left (scheduled in the future)
- if (!event_queue.empty()) {
- const s64 needed_ticks =
- std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH);
- downcounts[current_context] = needed_ticks;
+void CoreTiming::ThreadLoop() {
+ has_started = true;
+ while (!shutting_down) {
+ while (!paused) {
+ paused_set = false;
+ const auto next_time = Advance();
+ if (next_time) {
+ if (*next_time > 0) {
+ std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
+ event.WaitFor(next_time_ns);
+ }
+ } else {
+ wait_set = true;
+ event.Wait();
+ }
+ wait_set = false;
+ }
+ paused_set = true;
+ clock->Pause(true);
+ pause_event.Wait();
+ clock->Pause(false);
}
-
- is_global_timer_sane = false;
- accumulated_ticks = 0;
}
-void CoreTiming::Idle() {
- accumulated_ticks += downcounts[current_context];
- idled_cycles += downcounts[current_context];
- downcounts[current_context] = 0;
+std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
+ if (is_multicore) {
+ return clock->GetTimeNS();
+ }
+ return CyclesToNs(ticks);
}
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
- return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE};
-}
-
-s64 CoreTiming::GetDowncount() const {
- return downcounts[current_context];
+ if (is_multicore) {
+ return clock->GetTimeUS();
+ }
+ return CyclesToUs(ticks);
}
} // namespace Core::Timing
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index d50f4eb8a..72faaab64 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -1,19 +1,25 @@
-// Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project
-// Licensed under GPLv2+
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
+#include <atomic>
#include <chrono>
#include <functional>
#include <memory>
#include <mutex>
#include <optional>
#include <string>
+#include <thread>
#include <vector>
#include "common/common_types.h"
+#include "common/spin_lock.h"
+#include "common/thread.h"
#include "common/threadsafe_queue.h"
+#include "common/wall_clock.h"
+#include "core/hardware_properties.h"
namespace Core::Timing {
@@ -56,16 +62,40 @@ public:
/// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
/// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
- void Initialize();
+ void Initialize(std::function<void(void)>&& on_thread_init_);
/// Tears down all timing related functionality.
void Shutdown();
- /// After the first Advance, the slice lengths and the downcount will be reduced whenever an
- /// event is scheduled earlier than the current values.
- ///
- /// Scheduling from a callback will not update the downcount until the Advance() completes.
- void ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type,
+ /// Sets if emulation is multicore or single core, must be set before Initialize
+ void SetMulticore(bool is_multicore) {
+ this->is_multicore = is_multicore;
+ }
+
+ /// Check if it's using host timing.
+ bool IsHostTiming() const {
+ return is_multicore;
+ }
+
+ /// Pauses/Unpauses the execution of the timer thread.
+ void Pause(bool is_paused);
+
+ /// Pauses/Unpauses the execution of the timer thread and waits until paused.
+ void SyncPause(bool is_paused);
+
+ /// Checks if core timing is running.
+ bool IsRunning() const;
+
+ /// Checks if the timer thread has started.
+ bool HasStarted() const {
+ return has_started;
+ }
+
+ /// Checks if there are any pending time events.
+ bool HasPendingEvents() const;
+
+ /// Schedules an event in core timing
+ void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
u64 userdata = 0);
void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata);
@@ -73,41 +103,30 @@ public:
/// We only permit one event of each type in the queue at a time.
void RemoveEvent(const std::shared_ptr<EventType>& event_type);
- void ForceExceptionCheck(s64 cycles);
-
- /// This should only be called from the emu thread, if you are calling it any other thread,
- /// you are doing something evil
- u64 GetTicks() const;
-
- u64 GetIdleTicks() const;
-
void AddTicks(u64 ticks);
- /// Advance must be called at the beginning of dispatcher loops, not the end. Advance() ends
- /// the previous timing slice and begins the next one, you must Advance from the previous
- /// slice to the current one before executing any cycles. CoreTiming starts in slice -1 so an
- /// Advance() is required to initialize the slice length before the first cycle of emulated
- /// instructions is executed.
- void Advance();
+ void ResetTicks();
- /// Pretend that the main CPU has executed enough cycles to reach the next event.
void Idle();
- std::chrono::microseconds GetGlobalTimeUs() const;
+ s64 GetDowncount() const {
+ return downcount;
+ }
- void ResetRun();
+ /// Returns current time in emulated CPU cycles
+ u64 GetCPUTicks() const;
- s64 GetDowncount() const;
+ /// Returns current time in emulated in Clock cycles
+ u64 GetClockTicks() const;
- void SwitchContext(u64 new_context) {
- current_context = new_context;
- }
+ /// Returns current time in microseconds.
+ std::chrono::microseconds GetGlobalTimeUs() const;
- bool CanCurrentContextRun() const {
- return time_slice[current_context] > 0;
- }
+ /// Returns current time in nanoseconds.
+ std::chrono::nanoseconds GetGlobalTimeNs() const;
- std::optional<u64> NextAvailableCore(const s64 needed_ticks) const;
+ /// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
+ std::optional<s64> Advance();
private:
struct Event;
@@ -115,21 +134,14 @@ private:
/// Clear all pending events. This should ONLY be done on exit.
void ClearPendingEvents();
- static constexpr u64 num_cpu_cores = 4;
+ static void ThreadEntry(CoreTiming& instance);
+ void ThreadLoop();
- s64 global_timer = 0;
- s64 idled_cycles = 0;
- s64 slice_length = 0;
- u64 accumulated_ticks = 0;
- std::array<s64, num_cpu_cores> downcounts{};
- // Slice of time assigned to each core per run.
- std::array<s64, num_cpu_cores> time_slice{};
- u64 current_context = 0;
+ std::unique_ptr<Common::WallClock> clock;
- // Are we in a function that has been called from Advance()
- // If events are scheduled from a function that gets called from Advance(),
- // don't change slice_length and downcount.
- bool is_global_timer_sane = false;
+ u64 global_timer = 0;
+
+ std::chrono::nanoseconds start_point;
// The queue is a min-heap using std::make_heap/push_heap/pop_heap.
// We don't use std::priority_queue because we need to be able to serialize, unserialize and
@@ -139,8 +151,23 @@ private:
u64 event_fifo_id = 0;
std::shared_ptr<EventType> ev_lost;
-
- std::mutex inner_mutex;
+ Common::Event event{};
+ Common::Event pause_event{};
+ Common::SpinLock basic_lock{};
+ Common::SpinLock advance_lock{};
+ std::unique_ptr<std::thread> timer_thread;
+ std::atomic<bool> paused{};
+ std::atomic<bool> paused_set{};
+ std::atomic<bool> wait_set{};
+ std::atomic<bool> shutting_down{};
+ std::atomic<bool> has_started{};
+ std::function<void(void)> on_thread_init{};
+
+ bool is_multicore{};
+
+ /// Cycle timing
+ u64 ticks{};
+ s64 downcount{};
};
/// Creates a core timing event with the given name and callback.
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index de50d3b14..aefc63663 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -38,15 +38,23 @@ s64 usToCycles(std::chrono::microseconds us) {
}
s64 nsToCycles(std::chrono::nanoseconds ns) {
- if (static_cast<u64>(ns.count() / 1000000000) > MAX_VALUE_TO_MULTIPLY) {
- LOG_ERROR(Core_Timing, "Integer overflow, use max value");
- return std::numeric_limits<s64>::max();
- }
- if (static_cast<u64>(ns.count()) > MAX_VALUE_TO_MULTIPLY) {
- LOG_DEBUG(Core_Timing, "Time very big, do rounding");
- return Hardware::BASE_CLOCK_RATE * (ns.count() / 1000000000);
- }
- return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
+ const u128 temporal = Common::Multiply64Into128(ns.count(), Hardware::BASE_CLOCK_RATE);
+ return Common::Divide128On32(temporal, static_cast<u32>(1000000000)).first;
+}
+
+u64 msToClockCycles(std::chrono::milliseconds ns) {
+ const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+ return Common::Divide128On32(temp, 1000).first;
+}
+
+u64 usToClockCycles(std::chrono::microseconds ns) {
+ const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+ return Common::Divide128On32(temp, 1000000).first;
+}
+
+u64 nsToClockCycles(std::chrono::nanoseconds ns) {
+ const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
+ return Common::Divide128On32(temp, 1000000000).first;
}
u64 CpuCyclesToClockCycles(u64 ticks) {
@@ -54,4 +62,22 @@ u64 CpuCyclesToClockCycles(u64 ticks) {
return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
}
+std::chrono::milliseconds CyclesToMs(s64 cycles) {
+ const u128 temporal = Common::Multiply64Into128(cycles, 1000);
+ u64 ms = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+ return std::chrono::milliseconds(ms);
+}
+
+std::chrono::nanoseconds CyclesToNs(s64 cycles) {
+ const u128 temporal = Common::Multiply64Into128(cycles, 1000000000);
+ u64 ns = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+ return std::chrono::nanoseconds(ns);
+}
+
+std::chrono::microseconds CyclesToUs(s64 cycles) {
+ const u128 temporal = Common::Multiply64Into128(cycles, 1000000);
+ u64 us = Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
+ return std::chrono::microseconds(us);
+}
+
} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index addc72b19..2ed979e14 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -13,18 +13,12 @@ namespace Core::Timing {
s64 msToCycles(std::chrono::milliseconds ms);
s64 usToCycles(std::chrono::microseconds us);
s64 nsToCycles(std::chrono::nanoseconds ns);
-
-inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
- return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
-}
-
-inline std::chrono::nanoseconds CyclesToNs(s64 cycles) {
- return std::chrono::nanoseconds(cycles * 1000000000 / Hardware::BASE_CLOCK_RATE);
-}
-
-inline std::chrono::microseconds CyclesToUs(s64 cycles) {
- return std::chrono::microseconds(cycles * 1000000 / Hardware::BASE_CLOCK_RATE);
-}
+u64 msToClockCycles(std::chrono::milliseconds ns);
+u64 usToClockCycles(std::chrono::microseconds ns);
+u64 nsToClockCycles(std::chrono::nanoseconds ns);
+std::chrono::milliseconds CyclesToMs(s64 cycles);
+std::chrono::nanoseconds CyclesToNs(s64 cycles);
+std::chrono::microseconds CyclesToUs(s64 cycles);
u64 CpuCyclesToClockCycles(u64 ticks);
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 70ddbdcca..32afcf3ae 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -2,80 +2,372 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/fiber.h"
+#include "common/microprofile.h"
+#include "common/thread.h"
#include "core/arm/exclusive_monitor.h"
#include "core/core.h"
-#include "core/core_manager.h"
#include "core/core_timing.h"
#include "core/cpu_manager.h"
#include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/thread.h"
+#include "video_core/gpu.h"
namespace Core {
CpuManager::CpuManager(System& system) : system{system} {}
CpuManager::~CpuManager() = default;
+void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) {
+ cpu_manager.RunThread(core);
+}
+
void CpuManager::Initialize() {
- for (std::size_t index = 0; index < core_managers.size(); ++index) {
- core_managers[index] = std::make_unique<CoreManager>(system, index);
+ running_mode = true;
+ if (is_multicore) {
+ for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ core_data[core].host_thread =
+ std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
+ }
+ } else {
+ core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0);
}
}
void CpuManager::Shutdown() {
- for (auto& cpu_core : core_managers) {
- cpu_core.reset();
+ running_mode = false;
+ Pause(false);
+ if (is_multicore) {
+ for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ core_data[core].host_thread->join();
+ core_data[core].host_thread.reset();
+ }
+ } else {
+ core_data[0].host_thread->join();
+ core_data[0].host_thread.reset();
}
}
-CoreManager& CpuManager::GetCoreManager(std::size_t index) {
- return *core_managers.at(index);
+std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
+ return std::function<void(void*)>(GuestThreadFunction);
}
-const CoreManager& CpuManager::GetCoreManager(std::size_t index) const {
- return *core_managers.at(index);
+std::function<void(void*)> CpuManager::GetIdleThreadStartFunc() {
+ return std::function<void(void*)>(IdleThreadFunction);
}
-CoreManager& CpuManager::GetCurrentCoreManager() {
- // Otherwise, use single-threaded mode active_core variable
- return *core_managers[active_core];
+std::function<void(void*)> CpuManager::GetSuspendThreadStartFunc() {
+ return std::function<void(void*)>(SuspendThreadFunction);
}
-const CoreManager& CpuManager::GetCurrentCoreManager() const {
- // Otherwise, use single-threaded mode active_core variable
- return *core_managers[active_core];
+void CpuManager::GuestThreadFunction(void* cpu_manager_) {
+ CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+ if (cpu_manager->is_multicore) {
+ cpu_manager->MultiCoreRunGuestThread();
+ } else {
+ cpu_manager->SingleCoreRunGuestThread();
+ }
}
-void CpuManager::RunLoop(bool tight_loop) {
- if (GDBStub::IsServerEnabled()) {
- GDBStub::HandlePacket();
+void CpuManager::GuestRewindFunction(void* cpu_manager_) {
+ CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+ if (cpu_manager->is_multicore) {
+ cpu_manager->MultiCoreRunGuestLoop();
+ } else {
+ cpu_manager->SingleCoreRunGuestLoop();
+ }
+}
- // If the loop is halted and we want to step, use a tiny (1) number of instructions to
- // execute. Otherwise, get out of the loop function.
- if (GDBStub::GetCpuHaltFlag()) {
- if (GDBStub::GetCpuStepFlag()) {
- tight_loop = false;
- } else {
- return;
+void CpuManager::IdleThreadFunction(void* cpu_manager_) {
+ CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+ if (cpu_manager->is_multicore) {
+ cpu_manager->MultiCoreRunIdleThread();
+ } else {
+ cpu_manager->SingleCoreRunIdleThread();
+ }
+}
+
+void CpuManager::SuspendThreadFunction(void* cpu_manager_) {
+ CpuManager* cpu_manager = static_cast<CpuManager*>(cpu_manager_);
+ if (cpu_manager->is_multicore) {
+ cpu_manager->MultiCoreRunSuspendThread();
+ } else {
+ cpu_manager->SingleCoreRunSuspendThread();
+ }
+}
+
+void* CpuManager::GetStartFuncParamater() {
+ return static_cast<void*>(this);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+/// MultiCore ///
+///////////////////////////////////////////////////////////////////////////////
+
+void CpuManager::MultiCoreRunGuestThread() {
+ auto& kernel = system.Kernel();
+ {
+ auto& sched = kernel.CurrentScheduler();
+ sched.OnThreadStart();
+ }
+ MultiCoreRunGuestLoop();
+}
+
+void CpuManager::MultiCoreRunGuestLoop() {
+ auto& kernel = system.Kernel();
+ auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+ while (true) {
+ auto* physical_core = &kernel.CurrentPhysicalCore();
+ auto& arm_interface = thread->ArmInterface();
+ system.EnterDynarmicProfile();
+ while (!physical_core->IsInterrupted()) {
+ arm_interface.Run();
+ physical_core = &kernel.CurrentPhysicalCore();
+ }
+ system.ExitDynarmicProfile();
+ arm_interface.ClearExclusiveState();
+ auto& scheduler = kernel.CurrentScheduler();
+ scheduler.TryDoContextSwitch();
+ }
+}
+
+void CpuManager::MultiCoreRunIdleThread() {
+ auto& kernel = system.Kernel();
+ while (true) {
+ auto& physical_core = kernel.CurrentPhysicalCore();
+ physical_core.Idle();
+ auto& scheduler = kernel.CurrentScheduler();
+ scheduler.TryDoContextSwitch();
+ }
+}
+
+void CpuManager::MultiCoreRunSuspendThread() {
+ auto& kernel = system.Kernel();
+ {
+ auto& sched = kernel.CurrentScheduler();
+ sched.OnThreadStart();
+ }
+ while (true) {
+ auto core = kernel.GetCurrentHostThreadID();
+ auto& scheduler = kernel.CurrentScheduler();
+ Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+ Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[core].host_context);
+ ASSERT(scheduler.ContextSwitchPending());
+ ASSERT(core == kernel.GetCurrentHostThreadID());
+ scheduler.TryDoContextSwitch();
+ }
+}
+
+void CpuManager::MultiCorePause(bool paused) {
+ if (!paused) {
+ bool all_not_barrier = false;
+ while (!all_not_barrier) {
+ all_not_barrier = true;
+ for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ all_not_barrier &=
+ !core_data[core].is_running.load() && core_data[core].initialized.load();
+ }
+ }
+ for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ core_data[core].enter_barrier->Set();
+ }
+ if (paused_state.load()) {
+ bool all_barrier = false;
+ while (!all_barrier) {
+ all_barrier = true;
+ for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ all_barrier &=
+ core_data[core].is_paused.load() && core_data[core].initialized.load();
+ }
+ }
+ for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ core_data[core].exit_barrier->Set();
+ }
+ }
+ } else {
+ /// Wait until all cores are paused.
+ bool all_barrier = false;
+ while (!all_barrier) {
+ all_barrier = true;
+ for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ all_barrier &=
+ core_data[core].is_paused.load() && core_data[core].initialized.load();
}
}
+ /// Don't release the barrier
}
+ paused_state = paused;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+/// SingleCore ///
+///////////////////////////////////////////////////////////////////////////////
- auto& core_timing = system.CoreTiming();
- core_timing.ResetRun();
- bool keep_running{};
- do {
- keep_running = false;
- for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
- core_timing.SwitchContext(active_core);
- if (core_timing.CanCurrentContextRun()) {
- core_managers[active_core]->RunLoop(tight_loop);
+void CpuManager::SingleCoreRunGuestThread() {
+ auto& kernel = system.Kernel();
+ {
+ auto& sched = kernel.CurrentScheduler();
+ sched.OnThreadStart();
+ }
+ SingleCoreRunGuestLoop();
+}
+
+void CpuManager::SingleCoreRunGuestLoop() {
+ auto& kernel = system.Kernel();
+ auto* thread = kernel.CurrentScheduler().GetCurrentThread();
+ while (true) {
+ auto* physical_core = &kernel.CurrentPhysicalCore();
+ auto& arm_interface = thread->ArmInterface();
+ system.EnterDynarmicProfile();
+ if (!physical_core->IsInterrupted()) {
+ arm_interface.Run();
+ physical_core = &kernel.CurrentPhysicalCore();
+ }
+ system.ExitDynarmicProfile();
+ thread->SetPhantomMode(true);
+ system.CoreTiming().Advance();
+ thread->SetPhantomMode(false);
+ arm_interface.ClearExclusiveState();
+ PreemptSingleCore();
+ auto& scheduler = kernel.Scheduler(current_core);
+ scheduler.TryDoContextSwitch();
+ }
+}
+
+void CpuManager::SingleCoreRunIdleThread() {
+ auto& kernel = system.Kernel();
+ while (true) {
+ auto& physical_core = kernel.CurrentPhysicalCore();
+ PreemptSingleCore(false);
+ system.CoreTiming().AddTicks(1000U);
+ idle_count++;
+ auto& scheduler = physical_core.Scheduler();
+ scheduler.TryDoContextSwitch();
+ }
+}
+
+void CpuManager::SingleCoreRunSuspendThread() {
+ auto& kernel = system.Kernel();
+ {
+ auto& sched = kernel.CurrentScheduler();
+ sched.OnThreadStart();
+ }
+ while (true) {
+ auto core = kernel.GetCurrentHostThreadID();
+ auto& scheduler = kernel.CurrentScheduler();
+ Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+ Common::Fiber::YieldTo(current_thread->GetHostContext(), core_data[0].host_context);
+ ASSERT(scheduler.ContextSwitchPending());
+ ASSERT(core == kernel.GetCurrentHostThreadID());
+ scheduler.TryDoContextSwitch();
+ }
+}
+
+void CpuManager::PreemptSingleCore(bool from_running_enviroment) {
+ std::size_t old_core = current_core;
+ auto& scheduler = system.Kernel().Scheduler(old_core);
+ Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+ if (idle_count >= 4 || from_running_enviroment) {
+ if (!from_running_enviroment) {
+ system.CoreTiming().Idle();
+ idle_count = 0;
+ }
+ current_thread->SetPhantomMode(true);
+ system.CoreTiming().Advance();
+ current_thread->SetPhantomMode(false);
+ }
+ current_core.store((current_core + 1) % Core::Hardware::NUM_CPU_CORES);
+ system.CoreTiming().ResetTicks();
+ scheduler.Unload();
+ auto& next_scheduler = system.Kernel().Scheduler(current_core);
+ Common::Fiber::YieldTo(current_thread->GetHostContext(), next_scheduler.ControlContext());
+ /// May have changed scheduler
+ auto& current_scheduler = system.Kernel().Scheduler(current_core);
+ current_scheduler.Reload();
+ auto* currrent_thread2 = current_scheduler.GetCurrentThread();
+ if (!currrent_thread2->IsIdleThread()) {
+ idle_count = 0;
+ }
+}
+
+void CpuManager::SingleCorePause(bool paused) {
+ if (!paused) {
+ bool all_not_barrier = false;
+ while (!all_not_barrier) {
+ all_not_barrier = !core_data[0].is_running.load() && core_data[0].initialized.load();
+ }
+ core_data[0].enter_barrier->Set();
+ if (paused_state.load()) {
+ bool all_barrier = false;
+ while (!all_barrier) {
+ all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load();
}
- keep_running |= core_timing.CanCurrentContextRun();
+ core_data[0].exit_barrier->Set();
}
- } while (keep_running);
+ } else {
+ /// Wait until all cores are paused.
+ bool all_barrier = false;
+ while (!all_barrier) {
+ all_barrier = core_data[0].is_paused.load() && core_data[0].initialized.load();
+ }
+ /// Don't release the barrier
+ }
+ paused_state = paused;
+}
+
+void CpuManager::Pause(bool paused) {
+ if (is_multicore) {
+ MultiCorePause(paused);
+ } else {
+ SingleCorePause(paused);
+ }
+}
- if (GDBStub::IsServerEnabled()) {
- GDBStub::SetCpuStepFlag(false);
+void CpuManager::RunThread(std::size_t core) {
+ /// Initialization
+ system.RegisterCoreThread(core);
+ std::string name;
+ if (is_multicore) {
+ name = "yuzu:CoreCPUThread_" + std::to_string(core);
+ } else {
+ name = "yuzu:CPUThread";
+ }
+ MicroProfileOnThreadCreate(name.c_str());
+ Common::SetCurrentThreadName(name.c_str());
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+ auto& data = core_data[core];
+ data.enter_barrier = std::make_unique<Common::Event>();
+ data.exit_barrier = std::make_unique<Common::Event>();
+ data.host_context = Common::Fiber::ThreadToFiber();
+ data.is_running = false;
+ data.initialized = true;
+ const bool sc_sync = !is_async_gpu && !is_multicore;
+ bool sc_sync_first_use = sc_sync;
+ /// Running
+ while (running_mode) {
+ data.is_running = false;
+ data.enter_barrier->Wait();
+ if (sc_sync_first_use) {
+ system.GPU().ObtainContext();
+ sc_sync_first_use = false;
+ }
+ auto& scheduler = system.Kernel().CurrentScheduler();
+ Kernel::Thread* current_thread = scheduler.GetCurrentThread();
+ data.is_running = true;
+ Common::Fiber::YieldTo(data.host_context, current_thread->GetHostContext());
+ data.is_running = false;
+ data.is_paused = true;
+ data.exit_barrier->Wait();
+ data.is_paused = false;
}
+ /// Time to cleanup
+ data.host_context->Exit();
+ data.enter_barrier.reset();
+ data.exit_barrier.reset();
+ data.initialized = false;
}
} // namespace Core
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 97554d1bb..35929ed94 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -5,12 +5,19 @@
#pragma once
#include <array>
+#include <atomic>
+#include <functional>
#include <memory>
+#include <thread>
#include "core/hardware_properties.h"
+namespace Common {
+class Event;
+class Fiber;
+} // namespace Common
+
namespace Core {
-class CoreManager;
class System;
class CpuManager {
@@ -24,24 +31,75 @@ public:
CpuManager& operator=(const CpuManager&) = delete;
CpuManager& operator=(CpuManager&&) = delete;
+ /// Sets if emulation is multicore or single core, must be set before Initialize
+ void SetMulticore(bool is_multicore) {
+ this->is_multicore = is_multicore;
+ }
+
+ /// Sets if emulation is using an asynchronous GPU.
+ void SetAsyncGpu(bool is_async_gpu) {
+ this->is_async_gpu = is_async_gpu;
+ }
+
void Initialize();
void Shutdown();
- CoreManager& GetCoreManager(std::size_t index);
- const CoreManager& GetCoreManager(std::size_t index) const;
+ void Pause(bool paused);
- CoreManager& GetCurrentCoreManager();
- const CoreManager& GetCurrentCoreManager() const;
+ std::function<void(void*)> GetGuestThreadStartFunc();
+ std::function<void(void*)> GetIdleThreadStartFunc();
+ std::function<void(void*)> GetSuspendThreadStartFunc();
+ void* GetStartFuncParamater();
- std::size_t GetActiveCoreIndex() const {
- return active_core;
- }
+ void PreemptSingleCore(bool from_running_enviroment = true);
- void RunLoop(bool tight_loop);
+ std::size_t CurrentCore() const {
+ return current_core.load();
+ }
private:
- std::array<std::unique_ptr<CoreManager>, Hardware::NUM_CPU_CORES> core_managers;
- std::size_t active_core{}; ///< Active core, only used in single thread mode
+ static void GuestThreadFunction(void* cpu_manager);
+ static void GuestRewindFunction(void* cpu_manager);
+ static void IdleThreadFunction(void* cpu_manager);
+ static void SuspendThreadFunction(void* cpu_manager);
+
+ void MultiCoreRunGuestThread();
+ void MultiCoreRunGuestLoop();
+ void MultiCoreRunIdleThread();
+ void MultiCoreRunSuspendThread();
+ void MultiCorePause(bool paused);
+
+ void SingleCoreRunGuestThread();
+ void SingleCoreRunGuestLoop();
+ void SingleCoreRunIdleThread();
+ void SingleCoreRunSuspendThread();
+ void SingleCorePause(bool paused);
+
+ static void ThreadStart(CpuManager& cpu_manager, std::size_t core);
+
+ void RunThread(std::size_t core);
+
+ struct CoreData {
+ std::shared_ptr<Common::Fiber> host_context;
+ std::unique_ptr<Common::Event> enter_barrier;
+ std::unique_ptr<Common::Event> exit_barrier;
+ std::atomic<bool> is_running;
+ std::atomic<bool> is_paused;
+ std::atomic<bool> initialized;
+ std::unique_ptr<std::thread> host_thread;
+ };
+
+ std::atomic<bool> running_mode{};
+ std::atomic<bool> paused_state{};
+
+ std::array<CoreData, Core::Hardware::NUM_CPU_CORES> core_data{};
+
+ bool is_async_gpu{};
+ bool is_multicore{};
+ std::atomic<std::size_t> current_core{};
+ std::size_t preemption_count{};
+ std::size_t idle_count{};
+ static constexpr std::size_t max_cycle_runs = 5;
System& system;
};
diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp
index f155a1341..63cd2eead 100644
--- a/src/core/file_sys/control_metadata.cpp
+++ b/src/core/file_sys/control_metadata.cpp
@@ -95,6 +95,10 @@ u32 NACP::GetSupportedLanguages() const {
return raw.supported_languages;
}
+u64 NACP::GetDeviceSaveDataSize() const {
+ return raw.device_save_data_size;
+}
+
std::vector<u8> NACP::GetRawBytes() const {
std::vector<u8> out(sizeof(RawNACP));
std::memcpy(out.data(), &raw, sizeof(RawNACP));
diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h
index 2d8c251ac..e37b2fadf 100644
--- a/src/core/file_sys/control_metadata.h
+++ b/src/core/file_sys/control_metadata.h
@@ -113,6 +113,7 @@ public:
u32 GetSupportedLanguages() const;
std::vector<u8> GetRawBytes() const;
bool GetUserAccountSwitchLock() const;
+ u64 GetDeviceSaveDataSize() const;
private:
RawNACP raw{};
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index b93aa6935..c47ff863e 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -10,6 +10,7 @@
#include "common/file_util.h"
#include "common/hex_util.h"
#include "common/logging/log.h"
+#include "common/string_util.h"
#include "core/core.h"
#include "core/file_sys/content_archive.h"
#include "core/file_sys/control_metadata.h"
@@ -48,6 +49,23 @@ std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]);
}
+std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
+ std::string_view name) {
+#ifdef _WIN32
+ return dir->GetSubdirectory(name);
+#else
+ const auto subdirs = dir->GetSubdirectories();
+ for (const auto& subdir : subdirs) {
+ std::string dir_name = Common::ToLower(subdir->GetName());
+ if (dir_name == name) {
+ return subdir;
+ }
+ }
+
+ return nullptr;
+#endif
+}
+
PatchManager::PatchManager(u64 title_id) : title_id(title_id) {}
PatchManager::~PatchManager() = default;
@@ -104,7 +122,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end())
continue;
- auto exefs_dir = subdir->GetSubdirectory("exefs");
+ auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
if (exefs_dir != nullptr)
layers.push_back(std::move(exefs_dir));
}
@@ -130,7 +148,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend())
continue;
- auto exefs_dir = subdir->GetSubdirectory("exefs");
+ auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
if (exefs_dir != nullptr) {
for (const auto& file : exefs_dir->GetFiles()) {
if (file->GetExtension() == "ips") {
@@ -295,7 +313,7 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
continue;
}
- auto cheats_dir = subdir->GetSubdirectory("cheats");
+ auto cheats_dir = FindSubdirectoryCaseless(subdir, "cheats");
if (cheats_dir != nullptr) {
auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
if (res.has_value()) {
@@ -340,11 +358,11 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
continue;
}
- auto romfs_dir = subdir->GetSubdirectory("romfs");
+ auto romfs_dir = FindSubdirectoryCaseless(subdir, "romfs");
if (romfs_dir != nullptr)
layers.push_back(std::move(romfs_dir));
- auto ext_dir = subdir->GetSubdirectory("romfs_ext");
+ auto ext_dir = FindSubdirectoryCaseless(subdir, "romfs_ext");
if (ext_dir != nullptr)
layers_ext.push_back(std::move(ext_dir));
}
@@ -470,7 +488,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
for (const auto& mod : mod_dir->GetSubdirectories()) {
std::string types;
- const auto exefs_dir = mod->GetSubdirectory("exefs");
+ const auto exefs_dir = FindSubdirectoryCaseless(mod, "exefs");
if (IsDirValidAndNonEmpty(exefs_dir)) {
bool ips = false;
bool ipswitch = false;
@@ -494,9 +512,9 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
if (layeredfs)
AppendCommaIfNotEmpty(types, "LayeredExeFS");
}
- if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs")))
+ if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "romfs")))
AppendCommaIfNotEmpty(types, "LayeredFS");
- if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats")))
+ if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "cheats")))
AppendCommaIfNotEmpty(types, "Cheats");
if (types.empty())
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index ec6db524d..f4cb918dd 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -29,6 +29,11 @@ enum class TitleVersionFormat : u8 {
std::string FormatTitleVersion(u32 version,
TitleVersionFormat format = TitleVersionFormat::ThreeElements);
+// Returns a directory with name matching name case-insensitive. Returns nullptr if directory
+// doesn't have a directory with name.
+std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
+ std::string_view name);
+
// A centralized class to manage patches to games.
class PatchManager {
public:
diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp
index f3def93ab..adfd2c1a4 100644
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -57,7 +57,8 @@ void PrintSaveDataDescriptorWarnings(SaveDataDescriptor meta) {
bool ShouldSaveDataBeAutomaticallyCreated(SaveDataSpaceId space, const SaveDataDescriptor& desc) {
return desc.type == SaveDataType::CacheStorage || desc.type == SaveDataType::TemporaryStorage ||
(space == SaveDataSpaceId::NandUser && ///< Normal Save Data -- Current Title & User
- desc.type == SaveDataType::SaveData && desc.title_id == 0 && desc.save_id == 0);
+ (desc.type == SaveDataType::SaveData || desc.type == SaveDataType::DeviceSaveData) &&
+ desc.title_id == 0 && desc.save_id == 0);
}
} // Anonymous namespace
@@ -139,8 +140,10 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
u128 user_id, u64 save_id) {
// According to switchbrew, if a save is of type SaveData and the title id field is 0, it should
// be interpreted as the title id of the current process.
- if (type == SaveDataType::SaveData && title_id == 0) {
- title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
+ if (type == SaveDataType::SaveData || type == SaveDataType::DeviceSaveData) {
+ if (title_id == 0) {
+ title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
+ }
}
std::string out = GetSaveDataSpaceIdPath(space);
diff --git a/src/core/file_sys/system_archive/mii_model.cpp b/src/core/file_sys/system_archive/mii_model.cpp
index 6a9add87c..61bb67945 100644
--- a/src/core/file_sys/system_archive/mii_model.cpp
+++ b/src/core/file_sys/system_archive/mii_model.cpp
@@ -40,7 +40,7 @@ VirtualDir MiiModel() {
out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>(
MiiModelData::SHAPE_MID, "ShapeMid.dat"));
- return std::move(out);
+ return out;
}
} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/shared_font.cpp b/src/core/file_sys/system_archive/shared_font.cpp
index 2c05eb42e..c5cdf7d9b 100644
--- a/src/core/file_sys/system_archive/shared_font.cpp
+++ b/src/core/file_sys/system_archive/shared_font.cpp
@@ -23,7 +23,7 @@ VirtualFile PackBFTTF(const std::array<u8, Size>& data, const std::string& name)
std::vector<u8> bfttf(Size + sizeof(u64));
- u64 offset = 0;
+ size_t offset = 0;
Service::NS::EncryptSharedFont(vec, bfttf, offset);
return std::make_shared<VectorVfsFile>(std::move(bfttf), name);
}
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
index 6e22f97b0..aa313de66 100644
--- a/src/core/file_sys/system_archive/system_version.cpp
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -12,17 +12,17 @@ namespace SystemVersionData {
// This section should reflect the best system version to describe yuzu's HLE api.
// TODO(DarkLordZach): Update when HLE gets better.
-constexpr u8 VERSION_MAJOR = 5;
-constexpr u8 VERSION_MINOR = 1;
-constexpr u8 VERSION_MICRO = 0;
+constexpr u8 VERSION_MAJOR = 10;
+constexpr u8 VERSION_MINOR = 0;
+constexpr u8 VERSION_MICRO = 2;
-constexpr u8 REVISION_MAJOR = 3;
+constexpr u8 REVISION_MAJOR = 1;
constexpr u8 REVISION_MINOR = 0;
constexpr char PLATFORM_STRING[] = "NX";
-constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd";
-constexpr char DISPLAY_VERSION[] = "5.1.0";
-constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0";
+constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc";
+constexpr char DISPLAY_VERSION[] = "10.0.2";
+constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0";
} // namespace SystemVersionData
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index eda466a5d..9a081fbd4 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -46,7 +46,7 @@ private:
EmuWindow::EmuWindow() {
// TODO: Find a better place to set this.
config.min_client_area_size =
- std::make_pair(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
+ std::make_pair(Layout::MinimumSize::Width, Layout::MinimumSize::Height);
active_config = config;
touch_state = std::make_shared<TouchState>();
Input::RegisterFactory<Input::TouchDevice>("emu_window", touch_state);
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index 15ecfb13d..91ecc30ab 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -8,6 +8,11 @@
namespace Layout {
+namespace MinimumSize {
+constexpr u32 Width = 640;
+constexpr u32 Height = 360;
+} // namespace MinimumSize
+
namespace ScreenUndocked {
constexpr u32 Width = 1280;
constexpr u32 Height = 720;
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 70c0f8b80..79f22a403 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -35,7 +35,6 @@
#include "common/swap.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
-#include "core/core_manager.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/process.h"
diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h
index b04e046ed..456b41e1b 100644
--- a/src/core/hardware_properties.h
+++ b/src/core/hardware_properties.h
@@ -42,6 +42,10 @@ struct EmuThreadHandle {
constexpr u32 invalid_handle = 0xFFFFFFFF;
return {invalid_handle, invalid_handle};
}
+
+ bool IsInvalid() const {
+ return (*this) == InvalidHandle();
+ }
};
} // namespace Core
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
index 8475b698c..4d2a9b35d 100644
--- a/src/core/hle/kernel/address_arbiter.cpp
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -7,11 +7,15 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "core/arm/exclusive_monitor.h"
#include "core/core.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
#include "core/hle/result.h"
#include "core/memory.h"
@@ -20,6 +24,7 @@ namespace Kernel {
// Wake up num_to_wake (or all) threads in a vector.
void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads,
s32 num_to_wake) {
+ auto& time_manager = system.Kernel().TimeManager();
// Only process up to 'target' threads, unless 'target' is <= 0, in which case process
// them all.
std::size_t last = waiting_threads.size();
@@ -29,12 +34,10 @@ void AddressArbiter::WakeThreads(const std::vector<std::shared_ptr<Thread>>& wai
// Signal the waiting threads.
for (std::size_t i = 0; i < last; i++) {
- ASSERT(waiting_threads[i]->GetStatus() == ThreadStatus::WaitArb);
- waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+ waiting_threads[i]->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
RemoveThread(waiting_threads[i]);
- waiting_threads[i]->SetArbiterWaitAddress(0);
+ waiting_threads[i]->WaitForArbitration(false);
waiting_threads[i]->ResumeFromWait();
- system.PrepareReschedule(waiting_threads[i]->GetProcessorID());
}
}
@@ -56,6 +59,7 @@ ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 v
}
ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
+ SchedulerLock lock(system.Kernel());
const std::vector<std::shared_ptr<Thread>> waiting_threads =
GetThreadsWaitingOnAddress(address);
WakeThreads(waiting_threads, num_to_wake);
@@ -64,6 +68,7 @@ ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) {
ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value,
s32 num_to_wake) {
+ SchedulerLock lock(system.Kernel());
auto& memory = system.Memory();
// Ensure that we can write to the address.
@@ -71,16 +76,24 @@ ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32
return ERR_INVALID_ADDRESS_STATE;
}
- if (static_cast<s32>(memory.Read32(address)) != value) {
- return ERR_INVALID_STATE;
- }
+ const std::size_t current_core = system.CurrentCoreIndex();
+ auto& monitor = system.Monitor();
+ u32 current_value;
+ do {
+ current_value = monitor.ExclusiveRead32(current_core, address);
+
+ if (current_value != value) {
+ return ERR_INVALID_STATE;
+ }
+ current_value++;
+ } while (!monitor.ExclusiveWrite32(current_core, address, current_value));
- memory.Write32(address, static_cast<u32>(value + 1));
return SignalToAddressOnly(address, num_to_wake);
}
ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
s32 num_to_wake) {
+ SchedulerLock lock(system.Kernel());
auto& memory = system.Memory();
// Ensure that we can write to the address.
@@ -92,29 +105,33 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a
const std::vector<std::shared_ptr<Thread>> waiting_threads =
GetThreadsWaitingOnAddress(address);
- // Determine the modified value depending on the waiting count.
+ const std::size_t current_core = system.CurrentCoreIndex();
+ auto& monitor = system.Monitor();
s32 updated_value;
- if (num_to_wake <= 0) {
- if (waiting_threads.empty()) {
- updated_value = value + 1;
- } else {
- updated_value = value - 1;
+ do {
+ updated_value = monitor.ExclusiveRead32(current_core, address);
+
+ if (updated_value != value) {
+ return ERR_INVALID_STATE;
}
- } else {
- if (waiting_threads.empty()) {
- updated_value = value + 1;
- } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
- updated_value = value - 1;
+ // Determine the modified value depending on the waiting count.
+ if (num_to_wake <= 0) {
+ if (waiting_threads.empty()) {
+ updated_value = value + 1;
+ } else {
+ updated_value = value - 1;
+ }
} else {
- updated_value = value;
+ if (waiting_threads.empty()) {
+ updated_value = value + 1;
+ } else if (waiting_threads.size() <= static_cast<u32>(num_to_wake)) {
+ updated_value = value - 1;
+ } else {
+ updated_value = value;
+ }
}
- }
+ } while (!monitor.ExclusiveWrite32(current_core, address, updated_value));
- if (static_cast<s32>(memory.Read32(address)) != value) {
- return ERR_INVALID_STATE;
- }
-
- memory.Write32(address, static_cast<u32>(updated_value));
WakeThreads(waiting_threads, num_to_wake);
return RESULT_SUCCESS;
}
@@ -136,60 +153,127 @@ ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s
ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout,
bool should_decrement) {
auto& memory = system.Memory();
+ auto& kernel = system.Kernel();
+ Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
- // Ensure that we can read the address.
- if (!memory.IsValidVirtualAddress(address)) {
- return ERR_INVALID_ADDRESS_STATE;
- }
+ Handle event_handle = InvalidHandle;
+ {
+ SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
+
+ if (current_thread->IsPendingTermination()) {
+ lock.CancelSleep();
+ return ERR_THREAD_TERMINATING;
+ }
+
+ // Ensure that we can read the address.
+ if (!memory.IsValidVirtualAddress(address)) {
+ lock.CancelSleep();
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ s32 current_value = static_cast<s32>(memory.Read32(address));
+ if (current_value >= value) {
+ lock.CancelSleep();
+ return ERR_INVALID_STATE;
+ }
+
+ current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+
+ s32 decrement_value;
+
+ const std::size_t current_core = system.CurrentCoreIndex();
+ auto& monitor = system.Monitor();
+ do {
+ current_value = static_cast<s32>(monitor.ExclusiveRead32(current_core, address));
+ if (should_decrement) {
+ decrement_value = current_value - 1;
+ } else {
+ decrement_value = current_value;
+ }
+ } while (
+ !monitor.ExclusiveWrite32(current_core, address, static_cast<u32>(decrement_value)));
+
+ // Short-circuit without rescheduling, if timeout is zero.
+ if (timeout == 0) {
+ lock.CancelSleep();
+ return RESULT_TIMEOUT;
+ }
- const s32 cur_value = static_cast<s32>(memory.Read32(address));
- if (cur_value >= value) {
- return ERR_INVALID_STATE;
+ current_thread->SetArbiterWaitAddress(address);
+ InsertThread(SharedFrom(current_thread));
+ current_thread->SetStatus(ThreadStatus::WaitArb);
+ current_thread->WaitForArbitration(true);
}
- if (should_decrement) {
- memory.Write32(address, static_cast<u32>(cur_value - 1));
+ if (event_handle != InvalidHandle) {
+ auto& time_manager = kernel.TimeManager();
+ time_manager.UnscheduleTimeEvent(event_handle);
}
- // Short-circuit without rescheduling, if timeout is zero.
- if (timeout == 0) {
- return RESULT_TIMEOUT;
+ {
+ SchedulerLock lock(kernel);
+ if (current_thread->IsWaitingForArbitration()) {
+ RemoveThread(SharedFrom(current_thread));
+ current_thread->WaitForArbitration(false);
+ }
}
- return WaitForAddressImpl(address, timeout);
+ return current_thread->GetSignalingResult();
}
ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
auto& memory = system.Memory();
+ auto& kernel = system.Kernel();
+ Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
- // Ensure that we can read the address.
- if (!memory.IsValidVirtualAddress(address)) {
- return ERR_INVALID_ADDRESS_STATE;
- }
+ Handle event_handle = InvalidHandle;
+ {
+ SchedulerLockAndSleep lock(kernel, event_handle, current_thread, timeout);
+
+ if (current_thread->IsPendingTermination()) {
+ lock.CancelSleep();
+ return ERR_THREAD_TERMINATING;
+ }
+
+ // Ensure that we can read the address.
+ if (!memory.IsValidVirtualAddress(address)) {
+ lock.CancelSleep();
+ return ERR_INVALID_ADDRESS_STATE;
+ }
- // Only wait for the address if equal.
- if (static_cast<s32>(memory.Read32(address)) != value) {
- return ERR_INVALID_STATE;
+ s32 current_value = static_cast<s32>(memory.Read32(address));
+ if (current_value != value) {
+ lock.CancelSleep();
+ return ERR_INVALID_STATE;
+ }
+
+ // Short-circuit without rescheduling, if timeout is zero.
+ if (timeout == 0) {
+ lock.CancelSleep();
+ return RESULT_TIMEOUT;
+ }
+
+ current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+ current_thread->SetArbiterWaitAddress(address);
+ InsertThread(SharedFrom(current_thread));
+ current_thread->SetStatus(ThreadStatus::WaitArb);
+ current_thread->WaitForArbitration(true);
}
- // Short-circuit without rescheduling if timeout is zero.
- if (timeout == 0) {
- return RESULT_TIMEOUT;
+ if (event_handle != InvalidHandle) {
+ auto& time_manager = kernel.TimeManager();
+ time_manager.UnscheduleTimeEvent(event_handle);
}
- return WaitForAddressImpl(address, timeout);
-}
+ {
+ SchedulerLock lock(kernel);
+ if (current_thread->IsWaitingForArbitration()) {
+ RemoveThread(SharedFrom(current_thread));
+ current_thread->WaitForArbitration(false);
+ }
+ }
-ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) {
- Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
- current_thread->SetArbiterWaitAddress(address);
- InsertThread(SharedFrom(current_thread));
- current_thread->SetStatus(ThreadStatus::WaitArb);
- current_thread->InvalidateWakeupCallback();
- current_thread->WakeAfterDelay(timeout);
-
- system.PrepareReschedule(current_thread->GetProcessorID());
- return RESULT_TIMEOUT;
+ return current_thread->GetSignalingResult();
}
void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) {
@@ -221,9 +305,9 @@ void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) {
const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(),
[&thread](const auto& entry) { return thread == entry; });
- ASSERT(iter != thread_list.cend());
-
- thread_list.erase(iter);
+ if (iter != thread_list.cend()) {
+ thread_list.erase(iter);
+ }
}
std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
index f958eee5a..0b05d533c 100644
--- a/src/core/hle/kernel/address_arbiter.h
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -73,9 +73,6 @@ private:
/// Waits on an address if the value passed is equal to the argument value.
ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
- // Waits on the given address with a timeout in nanoseconds
- ResultCode WaitForAddressImpl(VAddr address, s64 timeout);
-
/// Wake up num_to_wake (or all) threads in a vector.
void WakeThreads(const std::vector<std::shared_ptr<Thread>>& waiting_threads, s32 num_to_wake);
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp
index 5498fd313..8aff2227a 100644
--- a/src/core/hle/kernel/client_port.cpp
+++ b/src/core/hle/kernel/client_port.cpp
@@ -34,7 +34,7 @@ ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() {
}
// Wake the threads waiting on the ServerPort
- server_port->WakeupAllWaitingThreads();
+ server_port->Signal();
return MakeResult(std::move(client));
}
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index 29bfa3621..d4e5d88cf 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -12,6 +12,7 @@ namespace Kernel {
constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
+constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59};
constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103};
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 91d94025c..9277b5d08 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -13,16 +13,18 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/logging/log.h"
-#include "core/core.h"
#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/readable_event.h"
+#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/server_session.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
#include "core/hle/kernel/writable_event.h"
#include "core/memory.h"
@@ -47,31 +49,33 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
const std::string& reason, u64 timeout, WakeupCallback&& callback,
std::shared_ptr<WritableEvent> writable_event) {
// Put the client thread to sleep until the wait event is signaled or the timeout expires.
- thread->SetWakeupCallback(
- [context = *this, callback](ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<SynchronizationObject> object,
- std::size_t index) mutable -> bool {
- ASSERT(thread->GetStatus() == ThreadStatus::WaitHLEEvent);
- callback(thread, context, reason);
- context.WriteToOutgoingCommandBuffer(*thread);
- return true;
- });
-
- auto& kernel = Core::System::GetInstance().Kernel();
+
if (!writable_event) {
// Create event if not provided
const auto pair = WritableEvent::CreateEventPair(kernel, "HLE Pause Event: " + reason);
writable_event = pair.writable;
}
- const auto readable_event{writable_event->GetReadableEvent()};
- writable_event->Clear();
- thread->SetStatus(ThreadStatus::WaitHLEEvent);
- thread->SetSynchronizationObjects({readable_event});
- readable_event->AddWaitingThread(thread);
-
- if (timeout > 0) {
- thread->WakeAfterDelay(timeout);
+ {
+ Handle event_handle = InvalidHandle;
+ SchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout);
+ thread->SetHLECallback(
+ [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool {
+ ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT
+ ? ThreadWakeupReason::Timeout
+ : ThreadWakeupReason::Signal;
+ callback(thread, context, reason);
+ context.WriteToOutgoingCommandBuffer(*thread);
+ return true;
+ });
+ const auto readable_event{writable_event->GetReadableEvent()};
+ writable_event->Clear();
+ thread->SetHLESyncObject(readable_event.get());
+ thread->SetStatus(ThreadStatus::WaitHLEEvent);
+ thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+ readable_event->AddWaitingThread(thread);
+ lock.Release();
+ thread->SetHLETimeEvent(event_handle);
}
is_thread_waiting = true;
@@ -79,9 +83,11 @@ std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
return writable_event;
}
-HLERequestContext::HLERequestContext(std::shared_ptr<Kernel::ServerSession> server_session,
+HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory,
+ std::shared_ptr<ServerSession> server_session,
std::shared_ptr<Thread> thread)
- : server_session(std::move(server_session)), thread(std::move(thread)) {
+ : server_session(std::move(server_session)),
+ thread(std::move(thread)), kernel{kernel}, memory{memory} {
cmd_buf[0] = 0;
}
@@ -216,7 +222,6 @@ ResultCode HLERequestContext::PopulateFromIncomingCommandBuffer(const HandleTabl
ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
auto& owner_process = *thread.GetOwnerProcess();
auto& handle_table = owner_process.GetHandleTable();
- auto& memory = Core::System::GetInstance().Memory();
std::array<u32, IPC::COMMAND_BUFFER_LENGTH> dst_cmdbuf;
memory.ReadBlock(owner_process, thread.GetTLSAddress(), dst_cmdbuf.data(),
@@ -283,19 +288,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
}
std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
- std::vector<u8> buffer;
+ std::vector<u8> buffer{};
const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
BufferDescriptorA()[buffer_index].Size()};
- auto& memory = Core::System::GetInstance().Memory();
if (is_buffer_a) {
- ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
- "BufferDescriptorA invalid buffer_index {}", buffer_index);
+ ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return buffer; },
+ "BufferDescriptorA invalid buffer_index {}", buffer_index);
buffer.resize(BufferDescriptorA()[buffer_index].Size());
memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());
} else {
- ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
- "BufferDescriptorX invalid buffer_index {}", buffer_index);
+ ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return buffer; },
+ "BufferDescriptorX invalid buffer_index {}", buffer_index);
buffer.resize(BufferDescriptorX()[buffer_index].Size());
memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());
}
@@ -319,18 +323,17 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
size = buffer_size; // TODO(bunnei): This needs to be HW tested
}
- auto& memory = Core::System::GetInstance().Memory();
if (is_buffer_b) {
- ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
- "BufferDescriptorB invalid buffer_index {}", buffer_index);
- ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size,
- "BufferDescriptorB buffer_index {} is not large enough", buffer_index);
+ ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index &&
+ BufferDescriptorB()[buffer_index].Size() >= size,
+ { return 0; }, "BufferDescriptorB is invalid, index={}, size={}",
+ buffer_index, size);
memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
} else {
- ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
- "BufferDescriptorC invalid buffer_index {}", buffer_index);
- ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size,
- "BufferDescriptorC buffer_index {} is not large enough", buffer_index);
+ ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index &&
+ BufferDescriptorC()[buffer_index].Size() >= size,
+ { return 0; }, "BufferDescriptorC is invalid, index={}, size={}",
+ buffer_index, size);
memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
}
@@ -341,16 +344,12 @@ std::size_t HLERequestContext::GetReadBufferSize(std::size_t buffer_index) const
const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
BufferDescriptorA()[buffer_index].Size()};
if (is_buffer_a) {
- ASSERT_MSG(BufferDescriptorA().size() > buffer_index,
- "BufferDescriptorA invalid buffer_index {}", buffer_index);
- ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0,
- "BufferDescriptorA buffer_index {} is empty", buffer_index);
+ ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return 0; },
+ "BufferDescriptorA invalid buffer_index {}", buffer_index);
return BufferDescriptorA()[buffer_index].Size();
} else {
- ASSERT_MSG(BufferDescriptorX().size() > buffer_index,
- "BufferDescriptorX invalid buffer_index {}", buffer_index);
- ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0,
- "BufferDescriptorX buffer_index {} is empty", buffer_index);
+ ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return 0; },
+ "BufferDescriptorX invalid buffer_index {}", buffer_index);
return BufferDescriptorX()[buffer_index].Size();
}
}
@@ -359,14 +358,15 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons
const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
BufferDescriptorB()[buffer_index].Size()};
if (is_buffer_b) {
- ASSERT_MSG(BufferDescriptorB().size() > buffer_index,
- "BufferDescriptorB invalid buffer_index {}", buffer_index);
+ ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index, { return 0; },
+ "BufferDescriptorB invalid buffer_index {}", buffer_index);
return BufferDescriptorB()[buffer_index].Size();
} else {
- ASSERT_MSG(BufferDescriptorC().size() > buffer_index,
- "BufferDescriptorC invalid buffer_index {}", buffer_index);
+ ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index, { return 0; },
+ "BufferDescriptorC invalid buffer_index {}", buffer_index);
return BufferDescriptorC()[buffer_index].Size();
}
+ return 0;
}
std::string HLERequestContext::Description() const {
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index af3330297..b31673928 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -19,6 +19,10 @@
union ResultCode;
+namespace Core::Memory {
+class Memory;
+}
+
namespace Service {
class ServiceFrameworkBase;
}
@@ -28,6 +32,7 @@ namespace Kernel {
class Domain;
class HandleTable;
class HLERequestContext;
+class KernelCore;
class Process;
class ServerSession;
class Thread;
@@ -98,7 +103,8 @@ protected:
*/
class HLERequestContext {
public:
- explicit HLERequestContext(std::shared_ptr<ServerSession> session,
+ explicit HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory,
+ std::shared_ptr<ServerSession> session,
std::shared_ptr<Thread> thread);
~HLERequestContext();
@@ -305,6 +311,9 @@ private:
std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers;
bool is_thread_waiting{};
+
+ KernelCore& kernel;
+ Core::Memory::Memory& memory;
};
} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 7655382fa..1f2af7a1b 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <atomic>
#include <bitset>
#include <functional>
@@ -13,11 +14,15 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
+#include "common/thread.h"
#include "core/arm/arm_interface.h"
+#include "core/arm/cpu_interrupt_handler.h"
#include "core/arm/exclusive_monitor.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/cpu_manager.h"
#include "core/device_memory.h"
#include "core/hardware_properties.h"
#include "core/hle/kernel/client_port.h"
@@ -39,85 +44,28 @@
#include "core/hle/result.h"
#include "core/memory.h"
-namespace Kernel {
-
-/**
- * Callback that will wake up the thread it was scheduled for
- * @param thread_handle The handle of the thread that's been awoken
- * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time
- */
-static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
- const auto proper_handle = static_cast<Handle>(thread_handle);
- const auto& system = Core::System::GetInstance();
-
- // Lock the global kernel mutex when we enter the kernel HLE.
- std::lock_guard lock{HLE::g_hle_lock};
-
- std::shared_ptr<Thread> thread =
- system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
- if (thread == nullptr) {
- LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
- return;
- }
-
- bool resume = true;
-
- if (thread->GetStatus() == ThreadStatus::WaitSynch ||
- thread->GetStatus() == ThreadStatus::WaitHLEEvent) {
- // Remove the thread from each of its waiting objects' waitlists
- for (const auto& object : thread->GetSynchronizationObjects()) {
- object->RemoveWaitingThread(thread);
- }
- thread->ClearSynchronizationObjects();
-
- // Invoke the wakeup callback before clearing the wait objects
- if (thread->HasWakeupCallback()) {
- resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Timeout, thread, nullptr, 0);
- }
- } else if (thread->GetStatus() == ThreadStatus::WaitMutex ||
- thread->GetStatus() == ThreadStatus::WaitCondVar) {
- thread->SetMutexWaitAddress(0);
- thread->SetWaitHandle(0);
- if (thread->GetStatus() == ThreadStatus::WaitCondVar) {
- thread->GetOwnerProcess()->RemoveConditionVariableThread(thread);
- thread->SetCondVarWaitAddress(0);
- }
-
- auto* const lock_owner = thread->GetLockOwner();
- // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance
- // and don't have a lock owner unless SignalProcessWideKey was called first and the thread
- // wasn't awakened due to the mutex already being acquired.
- if (lock_owner != nullptr) {
- lock_owner->RemoveMutexWaiter(thread);
- }
- }
+MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
- if (thread->GetStatus() == ThreadStatus::WaitArb) {
- auto& address_arbiter = thread->GetOwnerProcess()->GetAddressArbiter();
- address_arbiter.HandleWakeupThread(thread);
- }
-
- if (resume) {
- if (thread->GetStatus() == ThreadStatus::WaitCondVar ||
- thread->GetStatus() == ThreadStatus::WaitArb) {
- thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
- }
- thread->ResumeFromWait();
- }
-}
+namespace Kernel {
struct KernelCore::Impl {
explicit Impl(Core::System& system, KernelCore& kernel)
: global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}
+ void SetMulticore(bool is_multicore) {
+ this->is_multicore = is_multicore;
+ }
+
void Initialize(KernelCore& kernel) {
Shutdown();
+ RegisterHostThread();
InitializePhysicalCores();
InitializeSystemResourceLimit(kernel);
InitializeMemoryLayout();
- InitializeThreads();
- InitializePreemption();
+ InitializePreemption(kernel);
+ InitializeSchedulers();
+ InitializeSuspendThreads();
}
void Shutdown() {
@@ -126,13 +74,26 @@ struct KernelCore::Impl {
next_user_process_id = Process::ProcessIDMin;
next_thread_id = 1;
+ for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+ if (suspend_threads[i]) {
+ suspend_threads[i].reset();
+ }
+ }
+
+ for (std::size_t i = 0; i < cores.size(); i++) {
+ cores[i].Shutdown();
+ schedulers[i].reset();
+ }
+ cores.clear();
+
+ registered_core_threads.reset();
+
process_list.clear();
current_process = nullptr;
system_resource_limit = nullptr;
global_handle_table.Clear();
- thread_wakeup_event_type = nullptr;
preemption_event = nullptr;
global_scheduler.Shutdown();
@@ -145,13 +106,21 @@ struct KernelCore::Impl {
cores.clear();
exclusive_monitor.reset();
+ host_thread_ids.clear();
}
void InitializePhysicalCores() {
exclusive_monitor =
Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);
for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
- cores.emplace_back(system, i, *exclusive_monitor);
+ schedulers[i] = std::make_unique<Kernel::Scheduler>(system, i);
+ cores.emplace_back(system, i, *schedulers[i], interrupts[i]);
+ }
+ }
+
+ void InitializeSchedulers() {
+ for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+ cores[i].Scheduler().Initialize();
}
}
@@ -173,15 +142,13 @@ struct KernelCore::Impl {
}
}
- void InitializeThreads() {
- thread_wakeup_event_type =
- Core::Timing::CreateEvent("ThreadWakeupCallback", ThreadWakeupCallback);
- }
-
- void InitializePreemption() {
- preemption_event =
- Core::Timing::CreateEvent("PreemptionCallback", [this](u64 userdata, s64 cycles_late) {
- global_scheduler.PreemptThreads();
+ void InitializePreemption(KernelCore& kernel) {
+ preemption_event = Core::Timing::CreateEvent(
+ "PreemptionCallback", [this, &kernel](u64 userdata, s64 cycles_late) {
+ {
+ SchedulerLock lock(kernel);
+ global_scheduler.PreemptThreads();
+ }
s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10));
system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
});
@@ -190,6 +157,20 @@ struct KernelCore::Impl {
system.CoreTiming().ScheduleEvent(time_interval, preemption_event);
}
+ void InitializeSuspendThreads() {
+ for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+ std::string name = "Suspend Thread Id:" + std::to_string(i);
+ std::function<void(void*)> init_func =
+ system.GetCpuManager().GetSuspendThreadStartFunc();
+ void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
+ ThreadType type =
+ static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_SUSPEND);
+ auto thread_res = Thread::Create(system, type, name, 0, 0, 0, static_cast<u32>(i), 0,
+ nullptr, std::move(init_func), init_func_parameter);
+ suspend_threads[i] = std::move(thread_res).Unwrap();
+ }
+ }
+
void MakeCurrentProcess(Process* process) {
current_process = process;
@@ -197,15 +178,17 @@ struct KernelCore::Impl {
return;
}
- for (auto& core : cores) {
- core.SetIs64Bit(process->Is64BitProcess());
+ u32 core_id = GetCurrentHostThreadID();
+ if (core_id < Core::Hardware::NUM_CPU_CORES) {
+ system.Memory().SetCurrentPageTable(*process, core_id);
}
-
- system.Memory().SetCurrentPageTable(*process);
}
void RegisterCoreThread(std::size_t core_id) {
std::unique_lock lock{register_thread_mutex};
+ if (!is_multicore) {
+ single_core_thread_id = std::this_thread::get_id();
+ }
const std::thread::id this_id = std::this_thread::get_id();
const auto it = host_thread_ids.find(this_id);
ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
@@ -219,12 +202,19 @@ struct KernelCore::Impl {
std::unique_lock lock{register_thread_mutex};
const std::thread::id this_id = std::this_thread::get_id();
const auto it = host_thread_ids.find(this_id);
- ASSERT(it == host_thread_ids.end());
+ if (it != host_thread_ids.end()) {
+ return;
+ }
host_thread_ids[this_id] = registered_thread_ids++;
}
u32 GetCurrentHostThreadID() const {
const std::thread::id this_id = std::this_thread::get_id();
+ if (!is_multicore) {
+ if (single_core_thread_id == this_id) {
+ return static_cast<u32>(system.GetCpuManager().CurrentCore());
+ }
+ }
const auto it = host_thread_ids.find(this_id);
if (it == host_thread_ids.end()) {
return Core::INVALID_HOST_THREAD_ID;
@@ -240,7 +230,7 @@ struct KernelCore::Impl {
}
const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler();
const Kernel::Thread* current = sched.GetCurrentThread();
- if (current != nullptr) {
+ if (current != nullptr && !current->IsPhantomMode()) {
result.guest_handle = current->GetGlobalHandle();
} else {
result.guest_handle = InvalidHandle;
@@ -313,7 +303,6 @@ struct KernelCore::Impl {
std::shared_ptr<ResourceLimit> system_resource_limit;
- std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type;
std::shared_ptr<Core::Timing::EventType> preemption_event;
// This is the kernel's handle table or supervisor handle table which
@@ -343,6 +332,15 @@ struct KernelCore::Impl {
std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
+ std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
+ std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
+ std::array<std::unique_ptr<Kernel::Scheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
+
+ bool is_multicore{};
+ std::thread::id single_core_thread_id{};
+
+ std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{};
+
// System context
Core::System& system;
};
@@ -352,6 +350,10 @@ KernelCore::~KernelCore() {
Shutdown();
}
+void KernelCore::SetMulticore(bool is_multicore) {
+ impl->SetMulticore(is_multicore);
+}
+
void KernelCore::Initialize() {
impl->Initialize(*this);
}
@@ -397,11 +399,11 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
}
Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) {
- return impl->cores[id].Scheduler();
+ return *impl->schedulers[id];
}
const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const {
- return impl->cores[id].Scheduler();
+ return *impl->schedulers[id];
}
Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) {
@@ -412,6 +414,39 @@ const Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) const {
return impl->cores[id];
}
+Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() {
+ u32 core_id = impl->GetCurrentHostThreadID();
+ ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+ return impl->cores[core_id];
+}
+
+const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
+ u32 core_id = impl->GetCurrentHostThreadID();
+ ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+ return impl->cores[core_id];
+}
+
+Kernel::Scheduler& KernelCore::CurrentScheduler() {
+ u32 core_id = impl->GetCurrentHostThreadID();
+ ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+ return *impl->schedulers[core_id];
+}
+
+const Kernel::Scheduler& KernelCore::CurrentScheduler() const {
+ u32 core_id = impl->GetCurrentHostThreadID();
+ ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+ return *impl->schedulers[core_id];
+}
+
+std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts() {
+ return impl->interrupts;
+}
+
+const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& KernelCore::Interrupts()
+ const {
+ return impl->interrupts;
+}
+
Kernel::Synchronization& KernelCore::Synchronization() {
return impl->synchronization;
}
@@ -437,15 +472,17 @@ const Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() const {
}
void KernelCore::InvalidateAllInstructionCaches() {
- for (std::size_t i = 0; i < impl->global_scheduler.CpuCoresCount(); i++) {
- PhysicalCore(i).ArmInterface().ClearInstructionCache();
+ auto& threads = GlobalScheduler().GetThreadList();
+ for (auto& thread : threads) {
+ if (!thread->IsHLEThread()) {
+ auto& arm_interface = thread->ArmInterface();
+ arm_interface.ClearInstructionCache();
+ }
}
}
void KernelCore::PrepareReschedule(std::size_t id) {
- if (id < impl->global_scheduler.CpuCoresCount()) {
- impl->cores[id].Stop();
- }
+ // TODO: Reimplement, this
}
void KernelCore::AddNamedPort(std::string name, std::shared_ptr<ClientPort> port) {
@@ -481,10 +518,6 @@ u64 KernelCore::CreateNewUserProcessID() {
return impl->next_user_process_id++;
}
-const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallbackEventType() const {
- return impl->thread_wakeup_event_type;
-}
-
Kernel::HandleTable& KernelCore::GlobalHandleTable() {
return impl->global_handle_table;
}
@@ -557,4 +590,34 @@ const Kernel::SharedMemory& KernelCore::GetTimeSharedMem() const {
return *impl->time_shared_mem;
}
+void KernelCore::Suspend(bool in_suspention) {
+ const bool should_suspend = exception_exited || in_suspention;
+ {
+ SchedulerLock lock(*this);
+ ThreadStatus status = should_suspend ? ThreadStatus::Ready : ThreadStatus::WaitSleep;
+ for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+ impl->suspend_threads[i]->SetStatus(status);
+ }
+ }
+}
+
+bool KernelCore::IsMulticore() const {
+ return impl->is_multicore;
+}
+
+void KernelCore::ExceptionalExit() {
+ exception_exited = true;
+ Suspend(true);
+}
+
+void KernelCore::EnterSVCProfile() {
+ std::size_t core = impl->GetCurrentHostThreadID();
+ impl->svc_ticks[core] = MicroProfileEnter(MICROPROFILE_TOKEN(Kernel_SVC));
+}
+
+void KernelCore::ExitSVCProfile() {
+ std::size_t core = impl->GetCurrentHostThreadID();
+ MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
+}
+
} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 83de1f542..49bd47e89 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -4,15 +4,17 @@
#pragma once
+#include <array>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
+#include "core/hardware_properties.h"
#include "core/hle/kernel/memory/memory_types.h"
#include "core/hle/kernel/object.h"
namespace Core {
-struct EmuThreadHandle;
+class CPUInterruptHandler;
class ExclusiveMonitor;
class System;
} // namespace Core
@@ -65,6 +67,9 @@ public:
KernelCore(KernelCore&&) = delete;
KernelCore& operator=(KernelCore&&) = delete;
+ /// Sets if emulation is multicore or single core, must be set before Initialize
+ void SetMulticore(bool is_multicore);
+
/// Resets the kernel to a clean slate for use.
void Initialize();
@@ -110,6 +115,18 @@ public:
/// Gets the an instance of the respective physical CPU core.
const Kernel::PhysicalCore& PhysicalCore(std::size_t id) const;
+ /// Gets the sole instance of the Scheduler at the current running core.
+ Kernel::Scheduler& CurrentScheduler();
+
+ /// Gets the sole instance of the Scheduler at the current running core.
+ const Kernel::Scheduler& CurrentScheduler() const;
+
+ /// Gets the an instance of the current physical CPU core.
+ Kernel::PhysicalCore& CurrentPhysicalCore();
+
+ /// Gets the an instance of the current physical CPU core.
+ const Kernel::PhysicalCore& CurrentPhysicalCore() const;
+
/// Gets the an instance of the Synchronization Interface.
Kernel::Synchronization& Synchronization();
@@ -129,6 +146,10 @@ public:
const Core::ExclusiveMonitor& GetExclusiveMonitor() const;
+ std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts();
+
+ const std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>& Interrupts() const;
+
void InvalidateAllInstructionCaches();
/// Adds a port to the named port table
@@ -191,6 +212,18 @@ public:
/// Gets the shared memory object for Time services.
const Kernel::SharedMemory& GetTimeSharedMem() const;
+ /// Suspend/unsuspend the OS.
+ void Suspend(bool in_suspention);
+
+ /// Exceptional exit the OS.
+ void ExceptionalExit();
+
+ bool IsMulticore() const;
+
+ void EnterSVCProfile();
+
+ void ExitSVCProfile();
+
private:
friend class Object;
friend class Process;
@@ -208,9 +241,6 @@ private:
/// Creates a new thread ID, incrementing the internal thread ID counter.
u64 CreateNewThreadID();
- /// Retrieves the event type used for thread wakeup callbacks.
- const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const;
-
/// Provides a reference to the global handle table.
Kernel::HandleTable& GlobalHandleTable();
@@ -219,6 +249,7 @@ private:
struct Impl;
std::unique_ptr<Impl> impl;
+ bool exception_exited{};
};
} // namespace Kernel
diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp
index 6b432e1b2..acf13585c 100644
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_manager.cpp
@@ -104,7 +104,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
// Ensure that we don't leave anything un-freed
auto group_guard = detail::ScopeExit([&] {
for (const auto& it : page_list.Nodes()) {
- const auto min_num_pages{std::min(
+ const auto min_num_pages{std::min<size_t>(
it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
chosen_manager.Free(it.GetAddress(), min_num_pages);
}
@@ -139,7 +139,6 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
}
// Only succeed if we allocated as many pages as we wanted
- ASSERT(num_pages >= 0);
if (num_pages) {
return ERR_OUT_OF_MEMORY;
}
@@ -165,7 +164,7 @@ ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages,
// Free all of the pages
for (const auto& it : page_list.Nodes()) {
- const auto min_num_pages{std::min(
+ const auto min_num_pages{std::min<size_t>(
it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
chosen_manager.Free(it.GetAddress(), min_num_pages);
}
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index 7869eb32b..8f6c944d1 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -34,8 +34,6 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr
if (thread->GetMutexWaitAddress() != mutex_addr)
continue;
- ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
-
++num_waiters;
if (highest_priority_thread == nullptr ||
thread->GetPriority() < highest_priority_thread->GetPriority()) {
@@ -49,6 +47,7 @@ static std::pair<std::shared_ptr<Thread>, u32> GetHighestPriorityMutexWaitingThr
/// Update the mutex owner field of all threads waiting on the mutex to point to the new owner.
static void TransferMutexOwnership(VAddr mutex_addr, std::shared_ptr<Thread> current_thread,
std::shared_ptr<Thread> new_owner) {
+ current_thread->RemoveMutexWaiter(new_owner);
const auto threads = current_thread->GetMutexWaitingThreads();
for (const auto& thread : threads) {
if (thread->GetMutexWaitAddress() != mutex_addr)
@@ -72,85 +71,100 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
return ERR_INVALID_ADDRESS;
}
- const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
+ auto& kernel = system.Kernel();
std::shared_ptr<Thread> current_thread =
- SharedFrom(system.CurrentScheduler().GetCurrentThread());
- std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
- std::shared_ptr<Thread> requesting_thread = handle_table.Get<Thread>(requesting_thread_handle);
+ SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
+ {
+ SchedulerLock lock(kernel);
+ // The mutex address must be 4-byte aligned
+ if ((address % sizeof(u32)) != 0) {
+ return ERR_INVALID_ADDRESS;
+ }
- // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of another
- // thread.
- ASSERT(requesting_thread == current_thread);
+ const auto& handle_table = kernel.CurrentProcess()->GetHandleTable();
+ std::shared_ptr<Thread> holding_thread = handle_table.Get<Thread>(holding_thread_handle);
+ std::shared_ptr<Thread> requesting_thread =
+ handle_table.Get<Thread>(requesting_thread_handle);
- const u32 addr_value = system.Memory().Read32(address);
+ // TODO(Subv): It is currently unknown if it is possible to lock a mutex in behalf of
+ // another thread.
+ ASSERT(requesting_thread == current_thread);
- // If the mutex isn't being held, just return success.
- if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
- return RESULT_SUCCESS;
- }
+ current_thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
- if (holding_thread == nullptr) {
- LOG_ERROR(Kernel, "Holding thread does not exist! thread_handle={:08X}",
- holding_thread_handle);
- return ERR_INVALID_HANDLE;
- }
+ const u32 addr_value = system.Memory().Read32(address);
+
+ // If the mutex isn't being held, just return success.
+ if (addr_value != (holding_thread_handle | Mutex::MutexHasWaitersFlag)) {
+ return RESULT_SUCCESS;
+ }
- // Wait until the mutex is released
- current_thread->SetMutexWaitAddress(address);
- current_thread->SetWaitHandle(requesting_thread_handle);
+ if (holding_thread == nullptr) {
+ return ERR_INVALID_HANDLE;
+ }
- current_thread->SetStatus(ThreadStatus::WaitMutex);
- current_thread->InvalidateWakeupCallback();
+ // Wait until the mutex is released
+ current_thread->SetMutexWaitAddress(address);
+ current_thread->SetWaitHandle(requesting_thread_handle);
- // Update the lock holder thread's priority to prevent priority inversion.
- holding_thread->AddMutexWaiter(current_thread);
+ current_thread->SetStatus(ThreadStatus::WaitMutex);
- system.PrepareReschedule();
+ // Update the lock holder thread's priority to prevent priority inversion.
+ holding_thread->AddMutexWaiter(current_thread);
+ }
- return RESULT_SUCCESS;
+ {
+ SchedulerLock lock(kernel);
+ auto* owner = current_thread->GetLockOwner();
+ if (owner != nullptr) {
+ owner->RemoveMutexWaiter(current_thread);
+ }
+ }
+ return current_thread->GetSignalingResult();
}
-ResultCode Mutex::Release(VAddr address) {
+std::pair<ResultCode, std::shared_ptr<Thread>> Mutex::Unlock(std::shared_ptr<Thread> owner,
+ VAddr address) {
// The mutex address must be 4-byte aligned
if ((address % sizeof(u32)) != 0) {
LOG_ERROR(Kernel, "Address is not 4-byte aligned! address={:016X}", address);
- return ERR_INVALID_ADDRESS;
+ return {ERR_INVALID_ADDRESS, nullptr};
}
- std::shared_ptr<Thread> current_thread =
- SharedFrom(system.CurrentScheduler().GetCurrentThread());
- auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(current_thread, address);
-
- // There are no more threads waiting for the mutex, release it completely.
- if (thread == nullptr) {
+ auto [new_owner, num_waiters] = GetHighestPriorityMutexWaitingThread(owner, address);
+ if (new_owner == nullptr) {
system.Memory().Write32(address, 0);
- return RESULT_SUCCESS;
+ return {RESULT_SUCCESS, nullptr};
}
-
// Transfer the ownership of the mutex from the previous owner to the new one.
- TransferMutexOwnership(address, current_thread, thread);
-
- u32 mutex_value = thread->GetWaitHandle();
-
+ TransferMutexOwnership(address, owner, new_owner);
+ u32 mutex_value = new_owner->GetWaitHandle();
if (num_waiters >= 2) {
// Notify the guest that there are still some threads waiting for the mutex
mutex_value |= Mutex::MutexHasWaitersFlag;
}
+ new_owner->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
+ new_owner->SetLockOwner(nullptr);
+ new_owner->ResumeFromWait();
- // Grant the mutex to the next waiting thread and resume it.
system.Memory().Write32(address, mutex_value);
+ return {RESULT_SUCCESS, new_owner};
+}
- ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex);
- thread->ResumeFromWait();
+ResultCode Mutex::Release(VAddr address) {
+ auto& kernel = system.Kernel();
+ SchedulerLock lock(kernel);
- thread->SetLockOwner(nullptr);
- thread->SetCondVarWaitAddress(0);
- thread->SetMutexWaitAddress(0);
- thread->SetWaitHandle(0);
- thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
+ std::shared_ptr<Thread> current_thread =
+ SharedFrom(kernel.CurrentScheduler().GetCurrentThread());
- system.PrepareReschedule();
+ auto [result, new_owner] = Unlock(current_thread, address);
- return RESULT_SUCCESS;
+ if (result != RESULT_SUCCESS && new_owner != nullptr) {
+ new_owner->SetSynchronizationResults(nullptr, result);
+ }
+
+ return result;
}
+
} // namespace Kernel
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
index b904de2e8..3b81dc3df 100644
--- a/src/core/hle/kernel/mutex.h
+++ b/src/core/hle/kernel/mutex.h
@@ -28,6 +28,10 @@ public:
ResultCode TryAcquire(VAddr address, Handle holding_thread_handle,
Handle requesting_thread_handle);
+ /// Unlocks a mutex for owner at address
+ std::pair<ResultCode, std::shared_ptr<Thread>> Unlock(std::shared_ptr<Thread> owner,
+ VAddr address);
+
/// Releases the mutex at the specified address.
ResultCode Release(VAddr address);
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index a15011076..c6bbdb080 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -2,12 +2,15 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/spin_lock.h"
#include "core/arm/arm_interface.h"
#ifdef ARCHITECTURE_x86_64
#include "core/arm/dynarmic/arm_dynarmic_32.h"
#include "core/arm/dynarmic/arm_dynarmic_64.h"
#endif
+#include "core/arm/cpu_interrupt_handler.h"
#include "core/arm/exclusive_monitor.h"
#include "core/arm/unicorn/arm_unicorn.h"
#include "core/core.h"
@@ -17,50 +20,37 @@
namespace Kernel {
-PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
- Core::ExclusiveMonitor& exclusive_monitor)
- : core_index{id} {
-#ifdef ARCHITECTURE_x86_64
- arm_interface_32 =
- std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index);
- arm_interface_64 =
- std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index);
-
-#else
- using Core::ARM_Unicorn;
- arm_interface_32 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch32);
- arm_interface_64 = std::make_unique<ARM_Unicorn>(system, ARM_Unicorn::Arch::AArch64);
- LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
-#endif
+PhysicalCore::PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
+ Core::CPUInterruptHandler& interrupt_handler)
+ : interrupt_handler{interrupt_handler}, core_index{id}, scheduler{scheduler} {
- scheduler = std::make_unique<Kernel::Scheduler>(system, core_index);
+ guard = std::make_unique<Common::SpinLock>();
}
PhysicalCore::~PhysicalCore() = default;
-void PhysicalCore::Run() {
- arm_interface->Run();
- arm_interface->ClearExclusiveState();
+void PhysicalCore::Idle() {
+ interrupt_handler.AwaitInterrupt();
}
-void PhysicalCore::Step() {
- arm_interface->Step();
+void PhysicalCore::Shutdown() {
+ scheduler.Shutdown();
}
-void PhysicalCore::Stop() {
- arm_interface->PrepareReschedule();
+bool PhysicalCore::IsInterrupted() const {
+ return interrupt_handler.IsInterrupted();
}
-void PhysicalCore::Shutdown() {
- scheduler->Shutdown();
+void PhysicalCore::Interrupt() {
+ guard->lock();
+ interrupt_handler.SetInterrupt(true);
+ guard->unlock();
}
-void PhysicalCore::SetIs64Bit(bool is_64_bit) {
- if (is_64_bit) {
- arm_interface = arm_interface_64.get();
- } else {
- arm_interface = arm_interface_32.get();
- }
+void PhysicalCore::ClearInterrupt() {
+ guard->lock();
+ interrupt_handler.SetInterrupt(false);
+ guard->unlock();
}
} // namespace Kernel
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 3269166be..d7a7a951c 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -7,12 +7,17 @@
#include <cstddef>
#include <memory>
+namespace Common {
+class SpinLock;
+}
+
namespace Kernel {
class Scheduler;
} // namespace Kernel
namespace Core {
class ARM_Interface;
+class CPUInterruptHandler;
class ExclusiveMonitor;
class System;
} // namespace Core
@@ -21,7 +26,8 @@ namespace Kernel {
class PhysicalCore {
public:
- PhysicalCore(Core::System& system, std::size_t id, Core::ExclusiveMonitor& exclusive_monitor);
+ PhysicalCore(Core::System& system, std::size_t id, Kernel::Scheduler& scheduler,
+ Core::CPUInterruptHandler& interrupt_handler);
~PhysicalCore();
PhysicalCore(const PhysicalCore&) = delete;
@@ -30,23 +36,18 @@ public:
PhysicalCore(PhysicalCore&&) = default;
PhysicalCore& operator=(PhysicalCore&&) = default;
- /// Execute current jit state
- void Run();
- /// Execute a single instruction in current jit.
- void Step();
- /// Stop JIT execution/exit
- void Stop();
+ void Idle();
+ /// Interrupt this physical core.
+ void Interrupt();
- // Shutdown this physical core.
- void Shutdown();
+ /// Clear this core's interrupt
+ void ClearInterrupt();
- Core::ARM_Interface& ArmInterface() {
- return *arm_interface;
- }
+ /// Check if this core is interrupted
+ bool IsInterrupted() const;
- const Core::ARM_Interface& ArmInterface() const {
- return *arm_interface;
- }
+ // Shutdown this physical core.
+ void Shutdown();
bool IsMainCore() const {
return core_index == 0;
@@ -61,21 +62,18 @@ public:
}
Kernel::Scheduler& Scheduler() {
- return *scheduler;
+ return scheduler;
}
const Kernel::Scheduler& Scheduler() const {
- return *scheduler;
+ return scheduler;
}
- void SetIs64Bit(bool is_64_bit);
-
private:
+ Core::CPUInterruptHandler& interrupt_handler;
std::size_t core_index;
- std::unique_ptr<Core::ARM_Interface> arm_interface_32;
- std::unique_ptr<Core::ARM_Interface> arm_interface_64;
- std::unique_ptr<Kernel::Scheduler> scheduler;
- Core::ARM_Interface* arm_interface{};
+ Kernel::Scheduler& scheduler;
+ std::unique_ptr<Common::SpinLock> guard;
};
} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 36724569f..f9d7c024d 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -22,6 +22,7 @@
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/lock.h"
#include "core/memory.h"
#include "core/settings.h"
@@ -30,14 +31,15 @@ namespace {
/**
* Sets up the primary application thread
*
+ * @param system The system instance to create the main thread under.
* @param owner_process The parent process for the main thread
- * @param kernel The kernel instance to create the main thread under.
* @param priority The priority to give the main thread
*/
-void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, VAddr stack_top) {
+void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) {
const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart();
- auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
- owner_process.GetIdealCore(), stack_top, owner_process);
+ ThreadType type = THREADTYPE_USER;
+ auto thread_res = Thread::Create(system, type, "main", entry_point, priority, 0,
+ owner_process.GetIdealCore(), stack_top, &owner_process);
std::shared_ptr<Thread> thread = std::move(thread_res).Unwrap();
@@ -48,8 +50,12 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority, V
thread->GetContext32().cpu_registers[1] = thread_handle;
thread->GetContext64().cpu_registers[1] = thread_handle;
+ auto& kernel = system.Kernel();
// Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
- thread->ResumeFromWait();
+ {
+ SchedulerLock lock{kernel};
+ thread->SetStatus(ThreadStatus::Ready);
+ }
}
} // Anonymous namespace
@@ -132,7 +138,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const {
u64 Process::GetTotalPhysicalMemoryAvailable() const {
const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
- page_table->GetTotalHeapSize() + image_size + main_thread_stack_size};
+ page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
+ main_thread_stack_size};
if (capacity < memory_usage_capacity) {
return capacity;
@@ -146,7 +153,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
}
u64 Process::GetTotalPhysicalMemoryUsed() const {
- return image_size + main_thread_stack_size + page_table->GetTotalHeapSize();
+ return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
+ GetSystemResourceSize();
}
u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
@@ -180,7 +188,6 @@ void Process::RemoveConditionVariableThread(std::shared_ptr<Thread> thread) {
}
++it;
}
- UNREACHABLE();
}
std::vector<std::shared_ptr<Thread>> Process::GetConditionVariableThreads(
@@ -205,6 +212,7 @@ void Process::UnregisterThread(const Thread* thread) {
}
ResultCode Process::ClearSignalState() {
+ SchedulerLock lock(system.Kernel());
if (status == ProcessStatus::Exited) {
LOG_ERROR(Kernel, "called on a terminated process instance.");
return ERR_INVALID_STATE;
@@ -292,7 +300,7 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) {
ChangeStatus(ProcessStatus::Running);
- SetupMainThread(*this, kernel, main_thread_priority, main_thread_stack_top);
+ SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top);
resource_limit->Reserve(ResourceType::Threads, 1);
resource_limit->Reserve(ResourceType::PhysicalMemory, main_thread_stack_size);
}
@@ -338,6 +346,7 @@ static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
}
VAddr Process::CreateTLSRegion() {
+ SchedulerLock lock(system.Kernel());
if (auto tls_page_iter{FindTLSPageWithAvailableSlots(tls_pages)};
tls_page_iter != tls_pages.cend()) {
return *tls_page_iter->ReserveSlot();
@@ -368,6 +377,7 @@ VAddr Process::CreateTLSRegion() {
}
void Process::FreeTLSRegion(VAddr tls_address) {
+ SchedulerLock lock(system.Kernel());
const VAddr aligned_address = Common::AlignDown(tls_address, Core::Memory::PAGE_SIZE);
auto iter =
std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
@@ -382,6 +392,7 @@ void Process::FreeTLSRegion(VAddr tls_address) {
}
void Process::LoadModule(CodeSet code_set, VAddr base_addr) {
+ std::lock_guard lock{HLE::g_hle_lock};
const auto ReprotectSegment = [&](const CodeSet::Segment& segment,
Memory::MemoryPermission permission) {
page_table->SetCodeMemoryPermission(segment.addr + base_addr, segment.size, permission);
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 00860fcbd..6e286419e 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -6,8 +6,10 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/readable_event.h"
+#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
namespace Kernel {
@@ -37,8 +39,9 @@ void ReadableEvent::Clear() {
}
ResultCode ReadableEvent::Reset() {
+ SchedulerLock lock(kernel);
if (!is_signaled) {
- LOG_ERROR(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
+ LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
GetObjectId(), GetTypeName(), GetName());
return ERR_INVALID_STATE;
}
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index d9beaa3a4..212e442f4 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
const std::size_t index{ResourceTypeToIndex(resource)};
s64 new_value = current[index] + amount;
- while (new_value > limit[index] && available[index] + amount <= limit[index]) {
+ if (new_value > limit[index] && available[index] + amount <= limit[index]) {
// TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
new_value = current[index] + amount;
-
- if (timeout >= 0) {
- break;
- }
}
if (new_value <= limit[index]) {
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 1140c72a3..2b12c0dbf 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -11,11 +11,15 @@
#include <utility>
#include "common/assert.h"
+#include "common/bit_util.h"
+#include "common/fiber.h"
#include "common/logging/log.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "core/cpu_manager.h"
#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/physical_core.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/time_manager.h"
@@ -27,103 +31,151 @@ GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {}
GlobalScheduler::~GlobalScheduler() = default;
void GlobalScheduler::AddThread(std::shared_ptr<Thread> thread) {
+ global_list_guard.lock();
thread_list.push_back(std::move(thread));
+ global_list_guard.unlock();
}
void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) {
+ global_list_guard.lock();
thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
thread_list.end());
+ global_list_guard.unlock();
}
-void GlobalScheduler::UnloadThread(std::size_t core) {
- Scheduler& sched = kernel.Scheduler(core);
- sched.UnloadThread();
-}
-
-void GlobalScheduler::SelectThread(std::size_t core) {
+u32 GlobalScheduler::SelectThreads() {
+ ASSERT(is_locked);
const auto update_thread = [](Thread* thread, Scheduler& sched) {
- if (thread != sched.selected_thread.get()) {
+ sched.guard.lock();
+ if (thread != sched.selected_thread_set.get()) {
if (thread == nullptr) {
++sched.idle_selection_count;
}
- sched.selected_thread = SharedFrom(thread);
+ sched.selected_thread_set = SharedFrom(thread);
}
- sched.is_context_switch_pending = sched.selected_thread != sched.current_thread;
+ const bool reschedule_pending =
+ sched.is_context_switch_pending || (sched.selected_thread_set != sched.current_thread);
+ sched.is_context_switch_pending = reschedule_pending;
std::atomic_thread_fence(std::memory_order_seq_cst);
+ sched.guard.unlock();
+ return reschedule_pending;
};
- Scheduler& sched = kernel.Scheduler(core);
- Thread* current_thread = nullptr;
- // Step 1: Get top thread in schedule queue.
- current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
- if (current_thread) {
- update_thread(current_thread, sched);
- return;
+ if (!is_reselection_pending.load()) {
+ return 0;
}
- // Step 2: Try selecting a suggested thread.
- Thread* winner = nullptr;
- std::set<s32> sug_cores;
- for (auto thread : suggested_queue[core]) {
- s32 this_core = thread->GetProcessorID();
- Thread* thread_on_core = nullptr;
- if (this_core >= 0) {
- thread_on_core = scheduled_queue[this_core].front();
- }
- if (this_core < 0 || thread != thread_on_core) {
- winner = thread;
- break;
+ std::array<Thread*, Core::Hardware::NUM_CPU_CORES> top_threads{};
+
+ u32 idle_cores{};
+
+ // Step 1: Get top thread in schedule queue.
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ Thread* top_thread =
+ scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
+ if (top_thread != nullptr) {
+ // TODO(Blinkhawk): Implement Thread Pinning
+ } else {
+ idle_cores |= (1ul << core);
}
- sug_cores.insert(this_core);
+ top_threads[core] = top_thread;
}
- // if we got a suggested thread, select it, else do a second pass.
- if (winner && winner->GetPriority() > 2) {
- if (winner->IsRunning()) {
- UnloadThread(static_cast<u32>(winner->GetProcessorID()));
+
+ while (idle_cores != 0) {
+ u32 core_id = Common::CountTrailingZeroes32(idle_cores);
+
+ if (!suggested_queue[core_id].empty()) {
+ std::array<s32, Core::Hardware::NUM_CPU_CORES> migration_candidates{};
+ std::size_t num_candidates = 0;
+ auto iter = suggested_queue[core_id].begin();
+ Thread* suggested = nullptr;
+ // Step 2: Try selecting a suggested thread.
+ while (iter != suggested_queue[core_id].end()) {
+ suggested = *iter;
+ iter++;
+ s32 suggested_core_id = suggested->GetProcessorID();
+ Thread* top_thread =
+ suggested_core_id >= 0 ? top_threads[suggested_core_id] : nullptr;
+ if (top_thread != suggested) {
+ if (top_thread != nullptr &&
+ top_thread->GetPriority() < THREADPRIO_MAX_CORE_MIGRATION) {
+ suggested = nullptr;
+ break;
+ // There's a too high thread to do core migration, cancel
+ }
+ TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id), suggested);
+ break;
+ }
+ suggested = nullptr;
+ migration_candidates[num_candidates++] = suggested_core_id;
+ }
+ // Step 3: Select a suggested thread from another core
+ if (suggested == nullptr) {
+ for (std::size_t i = 0; i < num_candidates; i++) {
+ s32 candidate_core = migration_candidates[i];
+ suggested = top_threads[candidate_core];
+ auto it = scheduled_queue[candidate_core].begin();
+ it++;
+ Thread* next = it != scheduled_queue[candidate_core].end() ? *it : nullptr;
+ if (next != nullptr) {
+ TransferToCore(suggested->GetPriority(), static_cast<s32>(core_id),
+ suggested);
+ top_threads[candidate_core] = next;
+ break;
+ } else {
+ suggested = nullptr;
+ }
+ }
+ }
+ top_threads[core_id] = suggested;
}
- TransferToCore(winner->GetPriority(), static_cast<s32>(core), winner);
- update_thread(winner, sched);
- return;
+
+ idle_cores &= ~(1ul << core_id);
}
- // Step 3: Select a suggested thread from another core
- for (auto& src_core : sug_cores) {
- auto it = scheduled_queue[src_core].begin();
- it++;
- if (it != scheduled_queue[src_core].end()) {
- Thread* thread_on_core = scheduled_queue[src_core].front();
- Thread* to_change = *it;
- if (thread_on_core->IsRunning() || to_change->IsRunning()) {
- UnloadThread(static_cast<u32>(src_core));
- }
- TransferToCore(thread_on_core->GetPriority(), static_cast<s32>(core), thread_on_core);
- current_thread = thread_on_core;
- break;
+ u32 cores_needing_context_switch{};
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ Scheduler& sched = kernel.Scheduler(core);
+ ASSERT(top_threads[core] == nullptr || top_threads[core]->GetProcessorID() == core);
+ if (update_thread(top_threads[core], sched)) {
+ cores_needing_context_switch |= (1ul << core);
}
}
- update_thread(current_thread, sched);
+ return cores_needing_context_switch;
}
bool GlobalScheduler::YieldThread(Thread* yielding_thread) {
+ ASSERT(is_locked);
// Note: caller should use critical section, etc.
+ if (!yielding_thread->IsRunnable()) {
+ // Normally this case shouldn't happen except for SetThreadActivity.
+ is_reselection_pending.store(true, std::memory_order_release);
+ return false;
+ }
const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
const u32 priority = yielding_thread->GetPriority();
// Yield the thread
- const Thread* const winner = scheduled_queue[core_id].front(priority);
- ASSERT_MSG(yielding_thread == winner, "Thread yielding without being in front");
- scheduled_queue[core_id].yield(priority);
+ Reschedule(priority, core_id, yielding_thread);
+ const Thread* const winner = scheduled_queue[core_id].front();
+ if (kernel.GetCurrentHostThreadID() != core_id) {
+ is_reselection_pending.store(true, std::memory_order_release);
+ }
return AskForReselectionOrMarkRedundant(yielding_thread, winner);
}
bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
+ ASSERT(is_locked);
// Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
// etc.
+ if (!yielding_thread->IsRunnable()) {
+ // Normally this case shouldn't happen except for SetThreadActivity.
+ is_reselection_pending.store(true, std::memory_order_release);
+ return false;
+ }
const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
const u32 priority = yielding_thread->GetPriority();
// Yield the thread
- ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority),
- "Thread yielding without being in front");
- scheduled_queue[core_id].yield(priority);
+ Reschedule(priority, core_id, yielding_thread);
std::array<Thread*, Core::Hardware::NUM_CPU_CORES> current_threads;
for (std::size_t i = 0; i < current_threads.size(); i++) {
@@ -153,21 +205,28 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) {
if (winner != nullptr) {
if (winner != yielding_thread) {
- if (winner->IsRunning()) {
- UnloadThread(static_cast<u32>(winner->GetProcessorID()));
- }
TransferToCore(winner->GetPriority(), s32(core_id), winner);
}
} else {
winner = next_thread;
}
+ if (kernel.GetCurrentHostThreadID() != core_id) {
+ is_reselection_pending.store(true, std::memory_order_release);
+ }
+
return AskForReselectionOrMarkRedundant(yielding_thread, winner);
}
bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) {
+ ASSERT(is_locked);
// Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section,
// etc.
+ if (!yielding_thread->IsRunnable()) {
+ // Normally this case shouldn't happen except for SetThreadActivity.
+ is_reselection_pending.store(true, std::memory_order_release);
+ return false;
+ }
Thread* winner = nullptr;
const u32 core_id = static_cast<u32>(yielding_thread->GetProcessorID());
@@ -195,25 +254,31 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread
}
if (winner != nullptr) {
if (winner != yielding_thread) {
- if (winner->IsRunning()) {
- UnloadThread(static_cast<u32>(winner->GetProcessorID()));
- }
TransferToCore(winner->GetPriority(), static_cast<s32>(core_id), winner);
}
} else {
winner = yielding_thread;
}
+ } else {
+ winner = scheduled_queue[core_id].front();
+ }
+
+ if (kernel.GetCurrentHostThreadID() != core_id) {
+ is_reselection_pending.store(true, std::memory_order_release);
}
return AskForReselectionOrMarkRedundant(yielding_thread, winner);
}
void GlobalScheduler::PreemptThreads() {
+ ASSERT(is_locked);
for (std::size_t core_id = 0; core_id < Core::Hardware::NUM_CPU_CORES; core_id++) {
const u32 priority = preemption_priorities[core_id];
if (scheduled_queue[core_id].size(priority) > 0) {
- scheduled_queue[core_id].front(priority)->IncrementYieldCount();
+ if (scheduled_queue[core_id].size(priority) > 1) {
+ scheduled_queue[core_id].front(priority)->IncrementYieldCount();
+ }
scheduled_queue[core_id].yield(priority);
if (scheduled_queue[core_id].size(priority) > 1) {
scheduled_queue[core_id].front(priority)->IncrementYieldCount();
@@ -247,9 +312,6 @@ void GlobalScheduler::PreemptThreads() {
}
if (winner != nullptr) {
- if (winner->IsRunning()) {
- UnloadThread(static_cast<u32>(winner->GetProcessorID()));
- }
TransferToCore(winner->GetPriority(), s32(core_id), winner);
current_thread =
winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread;
@@ -280,9 +342,6 @@ void GlobalScheduler::PreemptThreads() {
}
if (winner != nullptr) {
- if (winner->IsRunning()) {
- UnloadThread(static_cast<u32>(winner->GetProcessorID()));
- }
TransferToCore(winner->GetPriority(), s32(core_id), winner);
current_thread = winner;
}
@@ -292,34 +351,65 @@ void GlobalScheduler::PreemptThreads() {
}
}
+void GlobalScheduler::EnableInterruptAndSchedule(u32 cores_pending_reschedule,
+ Core::EmuThreadHandle global_thread) {
+ u32 current_core = global_thread.host_handle;
+ bool must_context_switch = global_thread.guest_handle != InvalidHandle &&
+ (current_core < Core::Hardware::NUM_CPU_CORES);
+ while (cores_pending_reschedule != 0) {
+ u32 core = Common::CountTrailingZeroes32(cores_pending_reschedule);
+ ASSERT(core < Core::Hardware::NUM_CPU_CORES);
+ if (!must_context_switch || core != current_core) {
+ auto& phys_core = kernel.PhysicalCore(core);
+ phys_core.Interrupt();
+ } else {
+ must_context_switch = true;
+ }
+ cores_pending_reschedule &= ~(1ul << core);
+ }
+ if (must_context_switch) {
+ auto& core_scheduler = kernel.CurrentScheduler();
+ kernel.ExitSVCProfile();
+ core_scheduler.TryDoContextSwitch();
+ kernel.EnterSVCProfile();
+ }
+}
+
void GlobalScheduler::Suggest(u32 priority, std::size_t core, Thread* thread) {
+ ASSERT(is_locked);
suggested_queue[core].add(thread, priority);
}
void GlobalScheduler::Unsuggest(u32 priority, std::size_t core, Thread* thread) {
+ ASSERT(is_locked);
suggested_queue[core].remove(thread, priority);
}
void GlobalScheduler::Schedule(u32 priority, std::size_t core, Thread* thread) {
+ ASSERT(is_locked);
ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
scheduled_queue[core].add(thread, priority);
}
void GlobalScheduler::SchedulePrepend(u32 priority, std::size_t core, Thread* thread) {
+ ASSERT(is_locked);
ASSERT_MSG(thread->GetProcessorID() == s32(core), "Thread must be assigned to this core.");
scheduled_queue[core].add(thread, priority, false);
}
void GlobalScheduler::Reschedule(u32 priority, std::size_t core, Thread* thread) {
+ ASSERT(is_locked);
scheduled_queue[core].remove(thread, priority);
scheduled_queue[core].add(thread, priority);
}
void GlobalScheduler::Unschedule(u32 priority, std::size_t core, Thread* thread) {
+ ASSERT(is_locked);
scheduled_queue[core].remove(thread, priority);
}
void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) {
+ ASSERT(is_locked);
const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT;
const s32 source_core = thread->GetProcessorID();
if (source_core == destination_core || !schedulable) {
@@ -349,6 +439,108 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread,
}
}
+void GlobalScheduler::AdjustSchedulingOnStatus(Thread* thread, u32 old_flags) {
+ if (old_flags == thread->scheduling_state) {
+ return;
+ }
+ ASSERT(is_locked);
+
+ if (old_flags == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+ // In this case the thread was running, now it's pausing/exitting
+ if (thread->processor_id >= 0) {
+ Unschedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
+ }
+
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ if (core != static_cast<u32>(thread->processor_id) &&
+ ((thread->affinity_mask >> core) & 1) != 0) {
+ Unsuggest(thread->current_priority, core, thread);
+ }
+ }
+ } else if (thread->scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable)) {
+ // The thread is now set to running from being stopped
+ if (thread->processor_id >= 0) {
+ Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
+ }
+
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ if (core != static_cast<u32>(thread->processor_id) &&
+ ((thread->affinity_mask >> core) & 1) != 0) {
+ Suggest(thread->current_priority, core, thread);
+ }
+ }
+ }
+
+ SetReselectionPending();
+}
+
+void GlobalScheduler::AdjustSchedulingOnPriority(Thread* thread, u32 old_priority) {
+ if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable)) {
+ return;
+ }
+ ASSERT(is_locked);
+ if (thread->processor_id >= 0) {
+ Unschedule(old_priority, static_cast<u32>(thread->processor_id), thread);
+ }
+
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ if (core != static_cast<u32>(thread->processor_id) &&
+ ((thread->affinity_mask >> core) & 1) != 0) {
+ Unsuggest(old_priority, core, thread);
+ }
+ }
+
+ if (thread->processor_id >= 0) {
+ if (thread == kernel.CurrentScheduler().GetCurrentThread()) {
+ SchedulePrepend(thread->current_priority, static_cast<u32>(thread->processor_id),
+ thread);
+ } else {
+ Schedule(thread->current_priority, static_cast<u32>(thread->processor_id), thread);
+ }
+ }
+
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ if (core != static_cast<u32>(thread->processor_id) &&
+ ((thread->affinity_mask >> core) & 1) != 0) {
+ Suggest(thread->current_priority, core, thread);
+ }
+ }
+ thread->IncrementYieldCount();
+ SetReselectionPending();
+}
+
+void GlobalScheduler::AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask,
+ s32 old_core) {
+ if (thread->scheduling_state != static_cast<u32>(ThreadSchedStatus::Runnable) ||
+ thread->current_priority >= THREADPRIO_COUNT) {
+ return;
+ }
+ ASSERT(is_locked);
+
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ if (((old_affinity_mask >> core) & 1) != 0) {
+ if (core == static_cast<u32>(old_core)) {
+ Unschedule(thread->current_priority, core, thread);
+ } else {
+ Unsuggest(thread->current_priority, core, thread);
+ }
+ }
+ }
+
+ for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
+ if (((thread->affinity_mask >> core) & 1) != 0) {
+ if (core == static_cast<u32>(thread->processor_id)) {
+ Schedule(thread->current_priority, core, thread);
+ } else {
+ Suggest(thread->current_priority, core, thread);
+ }
+ }
+ }
+
+ thread->IncrementYieldCount();
+ SetReselectionPending();
+}
+
void GlobalScheduler::Shutdown() {
for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
scheduled_queue[core].clear();
@@ -359,10 +551,12 @@ void GlobalScheduler::Shutdown() {
void GlobalScheduler::Lock() {
Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID();
+ ASSERT(!current_thread.IsInvalid());
if (current_thread == current_owner) {
++scope_lock;
} else {
inner_lock.lock();
+ is_locked = true;
current_owner = current_thread;
ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle());
scope_lock = 1;
@@ -374,17 +568,18 @@ void GlobalScheduler::Unlock() {
ASSERT(scope_lock > 0);
return;
}
- for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
- SelectThread(i);
- }
+ u32 cores_pending_reschedule = SelectThreads();
+ Core::EmuThreadHandle leaving_thread = current_owner;
current_owner = Core::EmuThreadHandle::InvalidHandle();
scope_lock = 1;
+ is_locked = false;
inner_lock.unlock();
- // TODO(Blinkhawk): Setup the interrupts and change context on current core.
+ EnableInterruptAndSchedule(cores_pending_reschedule, leaving_thread);
}
-Scheduler::Scheduler(Core::System& system, std::size_t core_id)
- : system{system}, core_id{core_id} {}
+Scheduler::Scheduler(Core::System& system, std::size_t core_id) : system(system), core_id(core_id) {
+ switch_fiber = std::make_shared<Common::Fiber>(std::function<void(void*)>(OnSwitch), this);
+}
Scheduler::~Scheduler() = default;
@@ -393,56 +588,128 @@ bool Scheduler::HaveReadyThreads() const {
}
Thread* Scheduler::GetCurrentThread() const {
- return current_thread.get();
+ if (current_thread) {
+ return current_thread.get();
+ }
+ return idle_thread.get();
}
Thread* Scheduler::GetSelectedThread() const {
return selected_thread.get();
}
-void Scheduler::SelectThreads() {
- system.GlobalScheduler().SelectThread(core_id);
-}
-
u64 Scheduler::GetLastContextSwitchTicks() const {
return last_context_switch_time;
}
void Scheduler::TryDoContextSwitch() {
+ auto& phys_core = system.Kernel().CurrentPhysicalCore();
+ if (phys_core.IsInterrupted()) {
+ phys_core.ClearInterrupt();
+ }
+ guard.lock();
if (is_context_switch_pending) {
SwitchContext();
+ } else {
+ guard.unlock();
}
}
-void Scheduler::UnloadThread() {
- Thread* const previous_thread = GetCurrentThread();
- Process* const previous_process = system.Kernel().CurrentProcess();
+void Scheduler::OnThreadStart() {
+ SwitchContextStep2();
+}
- UpdateLastContextSwitchTime(previous_thread, previous_process);
+void Scheduler::Unload() {
+ Thread* thread = current_thread.get();
+ if (thread) {
+ thread->SetContinuousOnSVC(false);
+ thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
+ thread->SetIsRunning(false);
+ if (!thread->IsHLEThread() && !thread->HasExited()) {
+ Core::ARM_Interface& cpu_core = thread->ArmInterface();
+ cpu_core.SaveContext(thread->GetContext32());
+ cpu_core.SaveContext(thread->GetContext64());
+ // Save the TPIDR_EL0 system register in case it was modified.
+ thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+ cpu_core.ClearExclusiveState();
+ }
+ thread->context_guard.unlock();
+ }
+}
- // Save context for previous thread
- if (previous_thread) {
- system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32());
- system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64());
- // Save the TPIDR_EL0 system register in case it was modified.
- previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
+void Scheduler::Reload() {
+ Thread* thread = current_thread.get();
+ if (thread) {
+ ASSERT_MSG(thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
+ "Thread must be runnable.");
- if (previous_thread->GetStatus() == ThreadStatus::Running) {
- // This is only the case when a reschedule is triggered without the current thread
- // yielding execution (i.e. an event triggered, system core time-sliced, etc)
- previous_thread->SetStatus(ThreadStatus::Ready);
+ // Cancel any outstanding wakeup events for this thread
+ thread->SetIsRunning(true);
+ thread->SetWasRunning(false);
+ thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
+
+ auto* const thread_owner_process = thread->GetOwnerProcess();
+ if (thread_owner_process != nullptr) {
+ system.Kernel().MakeCurrentProcess(thread_owner_process);
+ }
+ if (!thread->IsHLEThread()) {
+ Core::ARM_Interface& cpu_core = thread->ArmInterface();
+ cpu_core.LoadContext(thread->GetContext32());
+ cpu_core.LoadContext(thread->GetContext64());
+ cpu_core.SetTlsAddress(thread->GetTLSAddress());
+ cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
+ cpu_core.ChangeProcessorID(this->core_id);
+ cpu_core.ClearExclusiveState();
}
- previous_thread->SetIsRunning(false);
}
- current_thread = nullptr;
+}
+
+void Scheduler::SwitchContextStep2() {
+ Thread* previous_thread = current_thread_prev.get();
+ Thread* new_thread = selected_thread.get();
+
+ // Load context of new thread
+ Process* const previous_process =
+ previous_thread != nullptr ? previous_thread->GetOwnerProcess() : nullptr;
+
+ if (new_thread) {
+ ASSERT_MSG(new_thread->GetSchedulingStatus() == ThreadSchedStatus::Runnable,
+ "Thread must be runnable.");
+
+ // Cancel any outstanding wakeup events for this thread
+ new_thread->SetIsRunning(true);
+ new_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
+ new_thread->SetWasRunning(false);
+
+ auto* const thread_owner_process = current_thread->GetOwnerProcess();
+ if (thread_owner_process != nullptr) {
+ system.Kernel().MakeCurrentProcess(thread_owner_process);
+ }
+ if (!new_thread->IsHLEThread()) {
+ Core::ARM_Interface& cpu_core = new_thread->ArmInterface();
+ cpu_core.LoadContext(new_thread->GetContext32());
+ cpu_core.LoadContext(new_thread->GetContext64());
+ cpu_core.SetTlsAddress(new_thread->GetTLSAddress());
+ cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
+ cpu_core.ChangeProcessorID(this->core_id);
+ cpu_core.ClearExclusiveState();
+ }
+ }
+
+ TryDoContextSwitch();
}
void Scheduler::SwitchContext() {
- Thread* const previous_thread = GetCurrentThread();
- Thread* const new_thread = GetSelectedThread();
+ current_thread_prev = current_thread;
+ selected_thread = selected_thread_set;
+ Thread* previous_thread = current_thread_prev.get();
+ Thread* new_thread = selected_thread.get();
+ current_thread = selected_thread;
is_context_switch_pending = false;
+
if (new_thread == previous_thread) {
+ guard.unlock();
return;
}
@@ -452,51 +719,75 @@ void Scheduler::SwitchContext() {
// Save context for previous thread
if (previous_thread) {
- system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32());
- system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64());
- // Save the TPIDR_EL0 system register in case it was modified.
- previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
-
- if (previous_thread->GetStatus() == ThreadStatus::Running) {
- // This is only the case when a reschedule is triggered without the current thread
- // yielding execution (i.e. an event triggered, system core time-sliced, etc)
- previous_thread->SetStatus(ThreadStatus::Ready);
+ if (new_thread != nullptr && new_thread->IsSuspendThread()) {
+ previous_thread->SetWasRunning(true);
}
+ previous_thread->SetContinuousOnSVC(false);
+ previous_thread->last_running_ticks = system.CoreTiming().GetCPUTicks();
previous_thread->SetIsRunning(false);
- }
-
- // Load context of new thread
- if (new_thread) {
- ASSERT_MSG(new_thread->GetProcessorID() == s32(this->core_id),
- "Thread must be assigned to this core.");
- ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready,
- "Thread must be ready to become running.");
-
- // Cancel any outstanding wakeup events for this thread
- new_thread->CancelWakeupTimer();
- current_thread = SharedFrom(new_thread);
- new_thread->SetStatus(ThreadStatus::Running);
- new_thread->SetIsRunning(true);
-
- auto* const thread_owner_process = current_thread->GetOwnerProcess();
- if (previous_process != thread_owner_process) {
- system.Kernel().MakeCurrentProcess(thread_owner_process);
+ if (!previous_thread->IsHLEThread() && !previous_thread->HasExited()) {
+ Core::ARM_Interface& cpu_core = previous_thread->ArmInterface();
+ cpu_core.SaveContext(previous_thread->GetContext32());
+ cpu_core.SaveContext(previous_thread->GetContext64());
+ // Save the TPIDR_EL0 system register in case it was modified.
+ previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0());
+ cpu_core.ClearExclusiveState();
}
+ previous_thread->context_guard.unlock();
+ }
- system.ArmInterface(core_id).LoadContext(new_thread->GetContext32());
- system.ArmInterface(core_id).LoadContext(new_thread->GetContext64());
- system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress());
- system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
+ std::shared_ptr<Common::Fiber>* old_context;
+ if (previous_thread != nullptr) {
+ old_context = &previous_thread->GetHostContext();
} else {
- current_thread = nullptr;
- // Note: We do not reset the current process and current page table when idling because
- // technically we haven't changed processes, our threads are just paused.
+ old_context = &idle_thread->GetHostContext();
+ }
+ guard.unlock();
+
+ Common::Fiber::YieldTo(*old_context, switch_fiber);
+ /// When a thread wakes up, the scheduler may have changed to other in another core.
+ auto& next_scheduler = system.Kernel().CurrentScheduler();
+ next_scheduler.SwitchContextStep2();
+}
+
+void Scheduler::OnSwitch(void* this_scheduler) {
+ Scheduler* sched = static_cast<Scheduler*>(this_scheduler);
+ sched->SwitchToCurrent();
+}
+
+void Scheduler::SwitchToCurrent() {
+ while (true) {
+ guard.lock();
+ selected_thread = selected_thread_set;
+ current_thread = selected_thread;
+ is_context_switch_pending = false;
+ guard.unlock();
+ while (!is_context_switch_pending) {
+ if (current_thread != nullptr && !current_thread->IsHLEThread()) {
+ current_thread->context_guard.lock();
+ if (!current_thread->IsRunnable()) {
+ current_thread->context_guard.unlock();
+ break;
+ }
+ if (current_thread->GetProcessorID() != core_id) {
+ current_thread->context_guard.unlock();
+ break;
+ }
+ }
+ std::shared_ptr<Common::Fiber>* next_context;
+ if (current_thread != nullptr) {
+ next_context = &current_thread->GetHostContext();
+ } else {
+ next_context = &idle_thread->GetHostContext();
+ }
+ Common::Fiber::YieldTo(switch_fiber, *next_context);
+ }
}
}
void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
const u64 prev_switch_ticks = last_context_switch_time;
- const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
+ const u64 most_recent_switch_ticks = system.CoreTiming().GetCPUTicks();
const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
if (thread != nullptr) {
@@ -510,6 +801,16 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
last_context_switch_time = most_recent_switch_ticks;
}
+void Scheduler::Initialize() {
+ std::string name = "Idle Thread Id:" + std::to_string(core_id);
+ std::function<void(void*)> init_func = system.GetCpuManager().GetIdleThreadStartFunc();
+ void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
+ ThreadType type = static_cast<ThreadType>(THREADTYPE_KERNEL | THREADTYPE_HLE | THREADTYPE_IDLE);
+ auto thread_res = Thread::Create(system, type, name, 0, 64, 0, static_cast<u32>(core_id), 0,
+ nullptr, std::move(init_func), init_func_parameter);
+ idle_thread = std::move(thread_res).Unwrap();
+}
+
void Scheduler::Shutdown() {
current_thread = nullptr;
selected_thread = nullptr;
@@ -538,4 +839,13 @@ SchedulerLockAndSleep::~SchedulerLockAndSleep() {
time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
}
+void SchedulerLockAndSleep::Release() {
+ if (sleep_cancelled) {
+ return;
+ }
+ auto& time_manager = kernel.TimeManager();
+ time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
+ sleep_cancelled = true;
+}
+
} // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 07df33f9c..b3b4b5169 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -11,9 +11,14 @@
#include "common/common_types.h"
#include "common/multi_level_queue.h"
+#include "common/spin_lock.h"
#include "core/hardware_properties.h"
#include "core/hle/kernel/thread.h"
+namespace Common {
+class Fiber;
+}
+
namespace Core {
class ARM_Interface;
class System;
@@ -41,41 +46,17 @@ public:
return thread_list;
}
- /**
- * Add a thread to the suggested queue of a cpu core. Suggested threads may be
- * picked if no thread is scheduled to run on the core.
- */
- void Suggest(u32 priority, std::size_t core, Thread* thread);
-
- /**
- * Remove a thread to the suggested queue of a cpu core. Suggested threads may be
- * picked if no thread is scheduled to run on the core.
- */
- void Unsuggest(u32 priority, std::size_t core, Thread* thread);
-
- /**
- * Add a thread to the scheduling queue of a cpu core. The thread is added at the
- * back the queue in its priority level.
- */
- void Schedule(u32 priority, std::size_t core, Thread* thread);
-
- /**
- * Add a thread to the scheduling queue of a cpu core. The thread is added at the
- * front the queue in its priority level.
- */
- void SchedulePrepend(u32 priority, std::size_t core, Thread* thread);
+ /// Notify the scheduler a thread's status has changed.
+ void AdjustSchedulingOnStatus(Thread* thread, u32 old_flags);
- /// Reschedule an already scheduled thread based on a new priority
- void Reschedule(u32 priority, std::size_t core, Thread* thread);
-
- /// Unschedules a thread.
- void Unschedule(u32 priority, std::size_t core, Thread* thread);
+ /// Notify the scheduler a thread's priority has changed.
+ void AdjustSchedulingOnPriority(Thread* thread, u32 old_priority);
- /// Selects a core and forces it to unload its current thread's context
- void UnloadThread(std::size_t core);
+ /// Notify the scheduler a thread's core and/or affinity mask has changed.
+ void AdjustSchedulingOnAffinity(Thread* thread, u64 old_affinity_mask, s32 old_core);
/**
- * Takes care of selecting the new scheduled thread in three steps:
+ * Takes care of selecting the new scheduled threads in three steps:
*
* 1. First a thread is selected from the top of the priority queue. If no thread
* is obtained then we move to step two, else we are done.
@@ -85,8 +66,10 @@ public:
*
* 3. Third is no suggested thread is found, we do a second pass and pick a running
* thread in another core and swap it with its current thread.
+ *
+ * returns the cores needing scheduling.
*/
- void SelectThread(std::size_t core);
+ u32 SelectThreads();
bool HaveReadyThreads(std::size_t core_id) const {
return !scheduled_queue[core_id].empty();
@@ -149,6 +132,40 @@ private:
/// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling
/// and reschedules current core if needed.
void Unlock();
+
+ void EnableInterruptAndSchedule(u32 cores_pending_reschedule,
+ Core::EmuThreadHandle global_thread);
+
+ /**
+ * Add a thread to the suggested queue of a cpu core. Suggested threads may be
+ * picked if no thread is scheduled to run on the core.
+ */
+ void Suggest(u32 priority, std::size_t core, Thread* thread);
+
+ /**
+ * Remove a thread to the suggested queue of a cpu core. Suggested threads may be
+ * picked if no thread is scheduled to run on the core.
+ */
+ void Unsuggest(u32 priority, std::size_t core, Thread* thread);
+
+ /**
+ * Add a thread to the scheduling queue of a cpu core. The thread is added at the
+ * back the queue in its priority level.
+ */
+ void Schedule(u32 priority, std::size_t core, Thread* thread);
+
+ /**
+ * Add a thread to the scheduling queue of a cpu core. The thread is added at the
+ * front the queue in its priority level.
+ */
+ void SchedulePrepend(u32 priority, std::size_t core, Thread* thread);
+
+ /// Reschedule an already scheduled thread based on a new priority
+ void Reschedule(u32 priority, std::size_t core, Thread* thread);
+
+ /// Unschedules a thread.
+ void Unschedule(u32 priority, std::size_t core, Thread* thread);
+
/**
* Transfers a thread into an specific core. If the destination_core is -1
* it will be unscheduled from its source code and added into its suggested
@@ -170,10 +187,13 @@ private:
std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
/// Scheduler lock mechanisms.
- std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock
+ bool is_locked{};
+ Common::SpinLock inner_lock{};
std::atomic<s64> scope_lock{};
Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()};
+ Common::SpinLock global_list_guard{};
+
/// Lists all thread ids that aren't deleted/etc.
std::vector<std::shared_ptr<Thread>> thread_list;
KernelCore& kernel;
@@ -190,11 +210,11 @@ public:
/// Reschedules to the next available thread (call after current thread is suspended)
void TryDoContextSwitch();
- /// Unloads currently running thread
- void UnloadThread();
-
- /// Select the threads in top of the scheduling multilist.
- void SelectThreads();
+ /// The next two are for SingleCore Only.
+ /// Unload current thread before preempting core.
+ void Unload();
+ /// Reload current thread after core preemption.
+ void Reload();
/// Gets the current running thread
Thread* GetCurrentThread() const;
@@ -209,15 +229,30 @@ public:
return is_context_switch_pending;
}
+ void Initialize();
+
/// Shutdowns the scheduler.
void Shutdown();
+ void OnThreadStart();
+
+ std::shared_ptr<Common::Fiber>& ControlContext() {
+ return switch_fiber;
+ }
+
+ const std::shared_ptr<Common::Fiber>& ControlContext() const {
+ return switch_fiber;
+ }
+
private:
friend class GlobalScheduler;
/// Switches the CPU's active thread context to that of the specified thread
void SwitchContext();
+ /// When a thread wakes up, it must run this through it's new scheduler
+ void SwitchContextStep2();
+
/**
* Called on every context switch to update the internal timestamp
* This also updates the running time ticks for the given thread and
@@ -231,14 +266,24 @@ private:
*/
void UpdateLastContextSwitchTime(Thread* thread, Process* process);
+ static void OnSwitch(void* this_scheduler);
+ void SwitchToCurrent();
+
std::shared_ptr<Thread> current_thread = nullptr;
std::shared_ptr<Thread> selected_thread = nullptr;
+ std::shared_ptr<Thread> current_thread_prev = nullptr;
+ std::shared_ptr<Thread> selected_thread_set = nullptr;
+ std::shared_ptr<Thread> idle_thread = nullptr;
+
+ std::shared_ptr<Common::Fiber> switch_fiber = nullptr;
Core::System& system;
u64 last_context_switch_time = 0;
u64 idle_selection_count = 0;
const std::size_t core_id;
+ Common::SpinLock guard{};
+
bool is_context_switch_pending = false;
};
@@ -261,6 +306,8 @@ public:
sleep_cancelled = true;
}
+ void Release();
+
private:
Handle& event_handle;
Thread* time_task;
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index 0f102ca44..7b23a6889 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -17,6 +17,7 @@
#include "core/hle/kernel/hle_ipc.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/server_session.h"
#include "core/hle/kernel/session.h"
#include "core/hle/kernel/thread.h"
@@ -137,8 +138,8 @@ ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& con
ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread,
Core::Memory::Memory& memory) {
u32* cmd_buf{reinterpret_cast<u32*>(memory.GetPointer(thread->GetTLSAddress()))};
- std::shared_ptr<Kernel::HLERequestContext> context{
- std::make_shared<Kernel::HLERequestContext>(SharedFrom(this), std::move(thread))};
+ auto context =
+ std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread));
context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
request_queue.Push(std::move(context));
@@ -168,9 +169,12 @@ ResultCode ServerSession::CompleteSyncRequest() {
}
// Some service requests require the thread to block
- if (!context.IsThreadWaiting()) {
- context.GetThread().ResumeFromWait();
- context.GetThread().SetWaitSynchronizationResult(result);
+ {
+ SchedulerLock lock(kernel);
+ if (!context.IsThreadWaiting()) {
+ context.GetThread().ResumeFromWait();
+ context.GetThread().SetSynchronizationResults(nullptr, result);
+ }
}
request_queue.Pop();
@@ -180,8 +184,10 @@ ResultCode ServerSession::CompleteSyncRequest() {
ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,
Core::Memory::Memory& memory) {
- Core::System::GetInstance().CoreTiming().ScheduleEvent(20000, request_event, {});
- return QueueSyncRequest(std::move(thread), memory);
+ ResultCode result = QueueSyncRequest(std::move(thread), memory);
+ const u64 delay = kernel.IsMulticore() ? 0U : 20000U;
+ Core::System::GetInstance().CoreTiming().ScheduleEvent(delay, request_event, {});
+ return result;
}
} // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 4ae4529f5..5db19dcf3 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -10,14 +10,15 @@
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/fiber.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/string_util.h"
#include "core/arm/exclusive_monitor.h"
#include "core/core.h"
-#include "core/core_manager.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/cpu_manager.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
@@ -27,6 +28,7 @@
#include "core/hle/kernel/memory/memory_block.h"
#include "core/hle/kernel/memory/page_table.h"
#include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/physical_core.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/resource_limit.h"
@@ -37,6 +39,7 @@
#include "core/hle/kernel/svc_wrap.h"
#include "core/hle/kernel/synchronization.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
#include "core/hle/kernel/transfer_memory.h"
#include "core/hle/kernel/writable_event.h"
#include "core/hle/lock.h"
@@ -133,6 +136,7 @@ enum class ResourceLimitValueType {
ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit,
u32 resource_type, ResourceLimitValueType value_type) {
+ std::lock_guard lock{HLE::g_hle_lock};
const auto type = static_cast<ResourceType>(resource_type);
if (!IsValidResourceType(type)) {
LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type);
@@ -160,6 +164,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_
/// Set the process heap to a given Size. It can both extend and shrink the heap.
static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size);
// Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB.
@@ -190,6 +195,7 @@ static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_s
static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask,
u32 attribute) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_DEBUG(Kernel_SVC,
"called, address=0x{:016X}, size=0x{:X}, mask=0x{:08X}, attribute=0x{:08X}", address,
size, mask, attribute);
@@ -226,8 +232,15 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si
static_cast<Memory::MemoryAttribute>(attribute));
}
+static ResultCode SetMemoryAttribute32(Core::System& system, u32 address, u32 size, u32 mask,
+ u32 attribute) {
+ return SetMemoryAttribute(system, static_cast<VAddr>(address), static_cast<std::size_t>(size),
+ mask, attribute);
+}
+
/// Maps a memory range into a different range.
static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
src_addr, size);
@@ -241,8 +254,14 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr
return page_table.Map(dst_addr, src_addr, size);
}
+static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
+ return MapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr),
+ static_cast<std::size_t>(size));
+}
+
/// Unmaps a region that was previously mapped with svcMapMemory
static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr, u64 size) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
src_addr, size);
@@ -256,9 +275,15 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
return page_table.Unmap(dst_addr, src_addr, size);
}
+static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
+ return UnmapMemory(system, static_cast<VAddr>(dst_addr), static_cast<VAddr>(src_addr),
+ static_cast<std::size_t>(size));
+}
+
/// Connect to an OS service given the port name, returns the handle to the port to out
static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
VAddr port_name_address) {
+ std::lock_guard lock{HLE::g_hle_lock};
auto& memory = system.Memory();
if (!memory.IsValidVirtualAddress(port_name_address)) {
@@ -317,11 +342,30 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName());
auto thread = system.CurrentScheduler().GetCurrentThread();
- thread->InvalidateWakeupCallback();
- thread->SetStatus(ThreadStatus::WaitIPC);
- system.PrepareReschedule(thread->GetProcessorID());
+ {
+ SchedulerLock lock(system.Kernel());
+ thread->InvalidateHLECallback();
+ thread->SetStatus(ThreadStatus::WaitIPC);
+ session->SendSyncRequest(SharedFrom(thread), system.Memory());
+ }
+
+ if (thread->HasHLECallback()) {
+ Handle event_handle = thread->GetHLETimeEvent();
+ if (event_handle != InvalidHandle) {
+ auto& time_manager = system.Kernel().TimeManager();
+ time_manager.UnscheduleTimeEvent(event_handle);
+ }
+
+ {
+ SchedulerLock lock(system.Kernel());
+ auto* sync_object = thread->GetHLESyncObject();
+ sync_object->RemoveWaitingThread(SharedFrom(thread));
+ }
+
+ thread->InvokeHLECallback(SharedFrom(thread));
+ }
- return session->SendSyncRequest(SharedFrom(thread), system.Memory());
+ return thread->GetSignalingResult();
}
static ResultCode SendSyncRequest32(Core::System& system, Handle handle) {
@@ -383,6 +427,15 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han
return ERR_INVALID_HANDLE;
}
+static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high,
+ Handle handle) {
+ u64 process_id{};
+ const auto result = GetProcessId(system, &process_id, handle);
+ *process_id_low = static_cast<u32>(process_id);
+ *process_id_high = static_cast<u32>(process_id >> 32);
+ return result;
+}
+
/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr handles_address,
u64 handle_count, s64 nano_seconds) {
@@ -447,10 +500,13 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand
}
thread->CancelWait();
- system.PrepareReschedule(thread->GetProcessorID());
return RESULT_SUCCESS;
}
+static ResultCode CancelSynchronization32(Core::System& system, Handle thread_handle) {
+ return CancelSynchronization(system, thread_handle);
+}
+
/// Attempts to locks a mutex, creating it if it does not already exist
static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_handle,
VAddr mutex_addr, Handle requesting_thread_handle) {
@@ -475,6 +531,12 @@ static ResultCode ArbitrateLock(Core::System& system, Handle holding_thread_hand
requesting_thread_handle);
}
+static ResultCode ArbitrateLock32(Core::System& system, Handle holding_thread_handle,
+ u32 mutex_addr, Handle requesting_thread_handle) {
+ return ArbitrateLock(system, holding_thread_handle, static_cast<VAddr>(mutex_addr),
+ requesting_thread_handle);
+}
+
/// Unlock a mutex
static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
LOG_TRACE(Kernel_SVC, "called mutex_addr=0x{:X}", mutex_addr);
@@ -494,6 +556,10 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr mutex_addr) {
return current_process->GetMutex().Release(mutex_addr);
}
+static ResultCode ArbitrateUnlock32(Core::System& system, u32 mutex_addr) {
+ return ArbitrateUnlock(system, static_cast<VAddr>(mutex_addr));
+}
+
enum class BreakType : u32 {
Panic = 0,
AssertionFailed = 1,
@@ -594,6 +660,7 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
info2, has_dumped_buffer ? std::make_optional(debug_buffer) : std::nullopt);
if (!break_reason.signal_debugger) {
+ SchedulerLock lock(system.Kernel());
LOG_CRITICAL(
Debug_Emulated,
"Emulated program broke execution! reason=0x{:016X}, info1=0x{:016X}, info2=0x{:016X}",
@@ -605,14 +672,16 @@ static void Break(Core::System& system, u32 reason, u64 info1, u64 info2) {
const auto thread_processor_id = current_thread->GetProcessorID();
system.ArmInterface(static_cast<std::size_t>(thread_processor_id)).LogBacktrace();
- system.Kernel().CurrentProcess()->PrepareForTermination();
-
// Kill the current thread
+ system.Kernel().ExceptionalExit();
current_thread->Stop();
- system.PrepareReschedule();
}
}
+static void Break32(Core::System& system, u32 reason, u32 info1, u32 info2) {
+ Break(system, reason, static_cast<u64>(info1), static_cast<u64>(info2));
+}
+
/// Used to output a message on a debug hardware unit - does nothing on a retail unit
static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr address, u64 len) {
if (len == 0) {
@@ -627,6 +696,7 @@ static void OutputDebugString([[maybe_unused]] Core::System& system, VAddr addre
/// Gets system/memory information for the current process
static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 handle,
u64 info_sub_id) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_TRACE(Kernel_SVC, "called info_id=0x{:X}, info_sub_id=0x{:X}, handle=0x{:08X}", info_id,
info_sub_id, handle);
@@ -863,9 +933,9 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) {
const u64 thread_ticks = current_thread->GetTotalCPUTimeTicks();
- out_ticks = thread_ticks + (core_timing.GetTicks() - prev_ctx_ticks);
+ out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks);
} else if (same_thread && info_sub_id == system.CurrentCoreIndex()) {
- out_ticks = core_timing.GetTicks() - prev_ctx_ticks;
+ out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks;
}
*result = out_ticks;
@@ -892,6 +962,7 @@ static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_h
/// Maps memory at a desired address
static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
if (!Common::Is4KBAligned(addr)) {
@@ -939,8 +1010,13 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size)
return page_table.MapPhysicalMemory(addr, size);
}
+static ResultCode MapPhysicalMemory32(Core::System& system, u32 addr, u32 size) {
+ return MapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size));
+}
+
/// Unmaps memory previously mapped via MapPhysicalMemory
static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
if (!Common::Is4KBAligned(addr)) {
@@ -988,6 +1064,10 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size
return page_table.UnmapPhysicalMemory(addr, size);
}
+static ResultCode UnmapPhysicalMemory32(Core::System& system, u32 addr, u32 size) {
+ return UnmapPhysicalMemory(system, static_cast<VAddr>(addr), static_cast<std::size_t>(size));
+}
+
/// Sets the thread activity
static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
@@ -1017,10 +1097,11 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act
return ERR_BUSY;
}
- thread->SetActivity(static_cast<ThreadActivity>(activity));
+ return thread->SetActivity(static_cast<ThreadActivity>(activity));
+}
- system.PrepareReschedule(thread->GetProcessorID());
- return RESULT_SUCCESS;
+static ResultCode SetThreadActivity32(Core::System& system, Handle handle, u32 activity) {
+ return SetThreadActivity(system, handle, activity);
}
/// Gets the thread context
@@ -1064,6 +1145,10 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H
return RESULT_SUCCESS;
}
+static ResultCode GetThreadContext32(Core::System& system, u32 thread_context, Handle handle) {
+ return GetThreadContext(system, static_cast<VAddr>(thread_context), handle);
+}
+
/// Gets the priority for the specified thread
static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle handle) {
LOG_TRACE(Kernel_SVC, "called");
@@ -1071,6 +1156,7 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
const std::shared_ptr<Thread> thread = handle_table.Get<Thread>(handle);
if (!thread) {
+ *priority = 0;
LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", handle);
return ERR_INVALID_HANDLE;
}
@@ -1105,18 +1191,26 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri
thread->SetPriority(priority);
- system.PrepareReschedule(thread->GetProcessorID());
return RESULT_SUCCESS;
}
+static ResultCode SetThreadPriority32(Core::System& system, Handle handle, u32 priority) {
+ return SetThreadPriority(system, handle, priority);
+}
+
/// Get which CPU core is executing the current thread
static u32 GetCurrentProcessorNumber(Core::System& system) {
LOG_TRACE(Kernel_SVC, "called");
- return system.CurrentScheduler().GetCurrentThread()->GetProcessorID();
+ return static_cast<u32>(system.CurrentPhysicalCore().CoreIndex());
+}
+
+static u32 GetCurrentProcessorNumber32(Core::System& system) {
+ return GetCurrentProcessorNumber(system);
}
static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_handle, VAddr addr,
u64 size, u32 permissions) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_TRACE(Kernel_SVC,
"called, shared_memory_handle=0x{:X}, addr=0x{:X}, size=0x{:X}, permissions=0x{:08X}",
shared_memory_handle, addr, size, permissions);
@@ -1187,9 +1281,16 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han
return shared_memory->Map(*current_process, addr, size, permission_type);
}
+static ResultCode MapSharedMemory32(Core::System& system, Handle shared_memory_handle, u32 addr,
+ u32 size, u32 permissions) {
+ return MapSharedMemory(system, shared_memory_handle, static_cast<VAddr>(addr),
+ static_cast<std::size_t>(size), permissions);
+}
+
static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_address,
VAddr page_info_address, Handle process_handle,
VAddr address) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_TRACE(Kernel_SVC, "called process=0x{:08X} address={:X}", process_handle, address);
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
std::shared_ptr<Process> process = handle_table.Get<Process>(process_handle);
@@ -1372,6 +1473,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha
/// Exits the current process
static void ExitProcess(Core::System& system) {
auto* current_process = system.Kernel().CurrentProcess();
+ UNIMPLEMENTED();
LOG_INFO(Kernel_SVC, "Process {} exiting", current_process->GetProcessID());
ASSERT_MSG(current_process->GetStatus() == ProcessStatus::Running,
@@ -1381,8 +1483,10 @@ static void ExitProcess(Core::System& system) {
// Kill the current thread
system.CurrentScheduler().GetCurrentThread()->Stop();
+}
- system.PrepareReschedule();
+static void ExitProcess32(Core::System& system) {
+ ExitProcess(system);
}
/// Creates a new thread
@@ -1428,9 +1532,10 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(ResourceType::Threads, 1));
+ ThreadType type = THREADTYPE_USER;
CASCADE_RESULT(std::shared_ptr<Thread> thread,
- Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top,
- *current_process));
+ Thread::Create(system, type, "", entry_point, priority, arg, processor_id,
+ stack_top, current_process));
const auto new_thread_handle = current_process->GetHandleTable().Create(thread);
if (new_thread_handle.Failed()) {
@@ -1444,11 +1549,15 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
thread->SetName(
fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
- system.PrepareReschedule(thread->GetProcessorID());
-
return RESULT_SUCCESS;
}
+static ResultCode CreateThread32(Core::System& system, Handle* out_handle, u32 priority,
+ u32 entry_point, u32 arg, u32 stack_top, s32 processor_id) {
+ return CreateThread(system, out_handle, static_cast<VAddr>(entry_point), static_cast<u64>(arg),
+ static_cast<VAddr>(stack_top), priority, processor_id);
+}
+
/// Starts the thread for the provided handle
static ResultCode StartThread(Core::System& system, Handle thread_handle) {
LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
@@ -1463,13 +1572,11 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) {
ASSERT(thread->GetStatus() == ThreadStatus::Dormant);
- thread->ResumeFromWait();
-
- if (thread->GetStatus() == ThreadStatus::Ready) {
- system.PrepareReschedule(thread->GetProcessorID());
- }
+ return thread->Start();
+}
- return RESULT_SUCCESS;
+static ResultCode StartThread32(Core::System& system, Handle thread_handle) {
+ return StartThread(system, thread_handle);
}
/// Called when a thread exits
@@ -1477,9 +1584,12 @@ static void ExitThread(Core::System& system) {
LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC());
auto* const current_thread = system.CurrentScheduler().GetCurrentThread();
- current_thread->Stop();
system.GlobalScheduler().RemoveThread(SharedFrom(current_thread));
- system.PrepareReschedule();
+ current_thread->Stop();
+}
+
+static void ExitThread32(Core::System& system) {
+ ExitThread(system);
}
/// Sleep the current thread
@@ -1498,15 +1608,21 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
if (nanoseconds <= 0) {
switch (static_cast<SleepType>(nanoseconds)) {
- case SleepType::YieldWithoutLoadBalancing:
- is_redundant = current_thread->YieldSimple();
+ case SleepType::YieldWithoutLoadBalancing: {
+ auto pair = current_thread->YieldSimple();
+ is_redundant = pair.second;
break;
- case SleepType::YieldWithLoadBalancing:
- is_redundant = current_thread->YieldAndBalanceLoad();
+ }
+ case SleepType::YieldWithLoadBalancing: {
+ auto pair = current_thread->YieldAndBalanceLoad();
+ is_redundant = pair.second;
break;
- case SleepType::YieldAndWaitForLoadBalancing:
- is_redundant = current_thread->YieldAndWaitForLoadBalancing();
+ }
+ case SleepType::YieldAndWaitForLoadBalancing: {
+ auto pair = current_thread->YieldAndWaitForLoadBalancing();
+ is_redundant = pair.second;
break;
+ }
default:
UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds);
}
@@ -1514,13 +1630,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) {
current_thread->Sleep(nanoseconds);
}
- if (is_redundant) {
- // If it's redundant, the core is pretty much idle. Some games keep idling
- // a core while it's doing nothing, we advance timing to avoid costly continuous
- // calls.
- system.CoreTiming().AddTicks(2000);
+ if (is_redundant && !system.Kernel().IsMulticore()) {
+ system.Kernel().ExitSVCProfile();
+ system.CoreTiming().AddTicks(1000U);
+ system.GetCpuManager().PreemptSingleCore();
+ system.Kernel().EnterSVCProfile();
}
- system.PrepareReschedule(current_thread->GetProcessorID());
+}
+
+static void SleepThread32(Core::System& system, u32 nanoseconds_low, u32 nanoseconds_high) {
+ const s64 nanoseconds = static_cast<s64>(static_cast<u64>(nanoseconds_low) |
+ (static_cast<u64>(nanoseconds_high) << 32));
+ SleepThread(system, nanoseconds);
}
/// Wait process wide key atomic
@@ -1547,31 +1668,69 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add
}
ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
-
+ auto& kernel = system.Kernel();
+ Handle event_handle;
+ Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
auto* const current_process = system.Kernel().CurrentProcess();
- const auto& handle_table = current_process->GetHandleTable();
- std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle);
- ASSERT(thread);
+ {
+ SchedulerLockAndSleep lock(kernel, event_handle, current_thread, nano_seconds);
+ const auto& handle_table = current_process->GetHandleTable();
+ std::shared_ptr<Thread> thread = handle_table.Get<Thread>(thread_handle);
+ ASSERT(thread);
+
+ current_thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+
+ if (thread->IsPendingTermination()) {
+ lock.CancelSleep();
+ return ERR_THREAD_TERMINATING;
+ }
+
+ const auto release_result = current_process->GetMutex().Release(mutex_addr);
+ if (release_result.IsError()) {
+ lock.CancelSleep();
+ return release_result;
+ }
+
+ if (nano_seconds == 0) {
+ lock.CancelSleep();
+ return RESULT_TIMEOUT;
+ }
- const auto release_result = current_process->GetMutex().Release(mutex_addr);
- if (release_result.IsError()) {
- return release_result;
+ current_thread->SetCondVarWaitAddress(condition_variable_addr);
+ current_thread->SetMutexWaitAddress(mutex_addr);
+ current_thread->SetWaitHandle(thread_handle);
+ current_thread->SetStatus(ThreadStatus::WaitCondVar);
+ current_process->InsertConditionVariableThread(SharedFrom(current_thread));
}
- Thread* current_thread = system.CurrentScheduler().GetCurrentThread();
- current_thread->SetCondVarWaitAddress(condition_variable_addr);
- current_thread->SetMutexWaitAddress(mutex_addr);
- current_thread->SetWaitHandle(thread_handle);
- current_thread->SetStatus(ThreadStatus::WaitCondVar);
- current_thread->InvalidateWakeupCallback();
- current_process->InsertConditionVariableThread(SharedFrom(current_thread));
+ if (event_handle != InvalidHandle) {
+ auto& time_manager = kernel.TimeManager();
+ time_manager.UnscheduleTimeEvent(event_handle);
+ }
+
+ {
+ SchedulerLock lock(kernel);
- current_thread->WakeAfterDelay(nano_seconds);
+ auto* owner = current_thread->GetLockOwner();
+ if (owner != nullptr) {
+ owner->RemoveMutexWaiter(SharedFrom(current_thread));
+ }
+ current_process->RemoveConditionVariableThread(SharedFrom(current_thread));
+ }
// Note: Deliberately don't attempt to inherit the lock owner's priority.
- system.PrepareReschedule(current_thread->GetProcessorID());
- return RESULT_SUCCESS;
+ return current_thread->GetSignalingResult();
+}
+
+static ResultCode WaitProcessWideKeyAtomic32(Core::System& system, u32 mutex_addr,
+ u32 condition_variable_addr, Handle thread_handle,
+ u32 nanoseconds_low, u32 nanoseconds_high) {
+ const s64 nanoseconds =
+ static_cast<s64>(nanoseconds_low | (static_cast<u64>(nanoseconds_high) << 32));
+ return WaitProcessWideKeyAtomic(system, static_cast<VAddr>(mutex_addr),
+ static_cast<VAddr>(condition_variable_addr), thread_handle,
+ nanoseconds);
}
/// Signal process wide key
@@ -1582,7 +1741,9 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4));
// Retrieve a list of all threads that are waiting for this condition variable.
- auto* const current_process = system.Kernel().CurrentProcess();
+ auto& kernel = system.Kernel();
+ SchedulerLock lock(kernel);
+ auto* const current_process = kernel.CurrentProcess();
std::vector<std::shared_ptr<Thread>> waiting_threads =
current_process->GetConditionVariableThreads(condition_variable_addr);
@@ -1591,7 +1752,7 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
std::size_t last = waiting_threads.size();
if (target > 0)
last = std::min(waiting_threads.size(), static_cast<std::size_t>(target));
-
+ auto& time_manager = kernel.TimeManager();
for (std::size_t index = 0; index < last; ++index) {
auto& thread = waiting_threads[index];
@@ -1599,7 +1760,6 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
// liberate Cond Var Thread.
current_process->RemoveConditionVariableThread(thread);
- thread->SetCondVarWaitAddress(0);
const std::size_t current_core = system.CurrentCoreIndex();
auto& monitor = system.Monitor();
@@ -1610,10 +1770,8 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
u32 update_val = 0;
const VAddr mutex_address = thread->GetMutexWaitAddress();
do {
- monitor.SetExclusive(current_core, mutex_address);
-
// If the mutex is not yet acquired, acquire it.
- mutex_val = memory.Read32(mutex_address);
+ mutex_val = monitor.ExclusiveRead32(current_core, mutex_address);
if (mutex_val != 0) {
update_val = mutex_val | Mutex::MutexHasWaitersFlag;
@@ -1621,33 +1779,28 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
update_val = thread->GetWaitHandle();
}
} while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val));
+ monitor.ClearExclusive();
if (mutex_val == 0) {
// We were able to acquire the mutex, resume this thread.
- ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
- thread->ResumeFromWait();
-
auto* const lock_owner = thread->GetLockOwner();
if (lock_owner != nullptr) {
lock_owner->RemoveMutexWaiter(thread);
}
thread->SetLockOwner(nullptr);
- thread->SetMutexWaitAddress(0);
- thread->SetWaitHandle(0);
- thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
- system.PrepareReschedule(thread->GetProcessorID());
+ thread->SetSynchronizationResults(nullptr, RESULT_SUCCESS);
+ thread->ResumeFromWait();
} else {
// The mutex is already owned by some other thread, make this thread wait on it.
const Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
auto owner = handle_table.Get<Thread>(owner_handle);
ASSERT(owner);
- ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar);
- thread->InvalidateWakeupCallback();
- thread->SetStatus(ThreadStatus::WaitMutex);
+ if (thread->GetStatus() == ThreadStatus::WaitCondVar) {
+ thread->SetStatus(ThreadStatus::WaitMutex);
+ }
owner->AddMutexWaiter(thread);
- system.PrepareReschedule(thread->GetProcessorID());
}
}
}
@@ -1678,12 +1831,15 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter();
const ResultCode result =
address_arbiter.WaitForAddress(address, arbitration_type, value, timeout);
- if (result == RESULT_SUCCESS) {
- system.PrepareReschedule();
- }
return result;
}
+static ResultCode WaitForAddress32(Core::System& system, u32 address, u32 type, s32 value,
+ u32 timeout_low, u32 timeout_high) {
+ s64 timeout = static_cast<s64>(timeout_low | (static_cast<u64>(timeout_high) << 32));
+ return WaitForAddress(system, static_cast<VAddr>(address), type, value, timeout);
+}
+
// Signals to an address (via Address Arbiter)
static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
s32 num_to_wake) {
@@ -1707,6 +1863,11 @@ static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type,
return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake);
}
+static ResultCode SignalToAddress32(Core::System& system, u32 address, u32 type, s32 value,
+ s32 num_to_wake) {
+ return SignalToAddress(system, static_cast<VAddr>(address), type, value, num_to_wake);
+}
+
static void KernelDebug([[maybe_unused]] Core::System& system,
[[maybe_unused]] u32 kernel_debug_type, [[maybe_unused]] u64 param1,
[[maybe_unused]] u64 param2, [[maybe_unused]] u64 param3) {
@@ -1725,14 +1886,21 @@ static u64 GetSystemTick(Core::System& system) {
auto& core_timing = system.CoreTiming();
// Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick)
- const u64 result{Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks())};
+ const u64 result{system.CoreTiming().GetClockTicks()};
- // Advance time to defeat dumb games that busy-wait for the frame to end.
- core_timing.AddTicks(400);
+ if (!system.Kernel().IsMulticore()) {
+ core_timing.AddTicks(400U);
+ }
return result;
}
+static void GetSystemTick32(Core::System& system, u32* time_low, u32* time_high) {
+ u64 time = GetSystemTick(system);
+ *time_low = static_cast<u32>(time);
+ *time_high = static_cast<u32>(time >> 32);
+}
+
/// Close a handle
static ResultCode CloseHandle(Core::System& system, Handle handle) {
LOG_TRACE(Kernel_SVC, "Closing handle 0x{:08X}", handle);
@@ -1765,9 +1933,14 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) {
return ERR_INVALID_HANDLE;
}
+static ResultCode ResetSignal32(Core::System& system, Handle handle) {
+ return ResetSignal(system, handle);
+}
+
/// Creates a TransferMemory object
static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAddr addr, u64 size,
u32 permissions) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_DEBUG(Kernel_SVC, "called addr=0x{:X}, size=0x{:X}, perms=0x{:08X}", addr, size,
permissions);
@@ -1812,6 +1985,12 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd
return RESULT_SUCCESS;
}
+static ResultCode CreateTransferMemory32(Core::System& system, Handle* handle, u32 addr, u32 size,
+ u32 permissions) {
+ return CreateTransferMemory(system, handle, static_cast<VAddr>(addr),
+ static_cast<std::size_t>(size), permissions);
+}
+
static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, u32* core,
u64* mask) {
LOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
@@ -1821,6 +2000,8 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
if (!thread) {
LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}",
thread_handle);
+ *core = 0;
+ *mask = 0;
return ERR_INVALID_HANDLE;
}
@@ -1830,6 +2011,15 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle,
return RESULT_SUCCESS;
}
+static ResultCode GetThreadCoreMask32(Core::System& system, Handle thread_handle, u32* core,
+ u32* mask_low, u32* mask_high) {
+ u64 mask{};
+ const auto result = GetThreadCoreMask(system, thread_handle, core, &mask);
+ *mask_high = static_cast<u32>(mask >> 32);
+ *mask_low = static_cast<u32>(mask);
+ return result;
+}
+
static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core,
u64 affinity_mask) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}",
@@ -1861,7 +2051,7 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
return ERR_INVALID_COMBINATION;
}
- if (core < Core::NUM_CPU_CORES) {
+ if (core < Core::Hardware::NUM_CPU_CORES) {
if ((affinity_mask & (1ULL << core)) == 0) {
LOG_ERROR(Kernel_SVC,
"Core is not enabled for the current mask, core={}, mask={:016X}", core,
@@ -1883,11 +2073,14 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle,
return ERR_INVALID_HANDLE;
}
- system.PrepareReschedule(thread->GetProcessorID());
- thread->ChangeCore(core, affinity_mask);
- system.PrepareReschedule(thread->GetProcessorID());
+ return thread->SetCoreAndAffinityMask(core, affinity_mask);
+}
- return RESULT_SUCCESS;
+static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle, u32 core,
+ u32 affinity_mask_low, u32 affinity_mask_high) {
+ const u64 affinity_mask =
+ static_cast<u64>(affinity_mask_low) | (static_cast<u64>(affinity_mask_high) << 32);
+ return SetThreadCoreMask(system, thread_handle, core, affinity_mask);
}
static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle* read_handle) {
@@ -1918,6 +2111,10 @@ static ResultCode CreateEvent(Core::System& system, Handle* write_handle, Handle
return RESULT_SUCCESS;
}
+static ResultCode CreateEvent32(Core::System& system, Handle* write_handle, Handle* read_handle) {
+ return CreateEvent(system, write_handle, read_handle);
+}
+
static ResultCode ClearEvent(Core::System& system, Handle handle) {
LOG_TRACE(Kernel_SVC, "called, event=0x{:08X}", handle);
@@ -1939,6 +2136,10 @@ static ResultCode ClearEvent(Core::System& system, Handle handle) {
return ERR_INVALID_HANDLE;
}
+static ResultCode ClearEvent32(Core::System& system, Handle handle) {
+ return ClearEvent(system, handle);
+}
+
static ResultCode SignalEvent(Core::System& system, Handle handle) {
LOG_DEBUG(Kernel_SVC, "called. Handle=0x{:08X}", handle);
@@ -1951,10 +2152,13 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) {
}
writable_event->Signal();
- system.PrepareReschedule();
return RESULT_SUCCESS;
}
+static ResultCode SignalEvent32(Core::System& system, Handle handle) {
+ return SignalEvent(system, handle);
+}
+
static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_handle, u32 type) {
LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, type=0x{:X}", process_handle, type);
@@ -1982,6 +2186,7 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_
}
static ResultCode CreateResourceLimit(Core::System& system, Handle* out_handle) {
+ std::lock_guard lock{HLE::g_hle_lock};
LOG_DEBUG(Kernel_SVC, "called");
auto& kernel = system.Kernel();
@@ -2139,6 +2344,15 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd
return RESULT_SUCCESS;
}
+static ResultCode FlushProcessDataCache32(Core::System& system, Handle handle, u32 address,
+ u32 size) {
+ // Note(Blinkhawk): For emulation purposes of the data cache this is mostly a nope
+ // as all emulation is done in the same cache level in host architecture, thus data cache
+ // does not need flushing.
+ LOG_DEBUG(Kernel_SVC, "called");
+ return RESULT_SUCCESS;
+}
+
namespace {
struct FunctionDef {
using Func = void(Core::System&);
@@ -2153,57 +2367,57 @@ static const FunctionDef SVC_Table_32[] = {
{0x00, nullptr, "Unknown"},
{0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"},
{0x02, nullptr, "Unknown"},
- {0x03, nullptr, "SetMemoryAttribute32"},
- {0x04, nullptr, "MapMemory32"},
- {0x05, nullptr, "UnmapMemory32"},
+ {0x03, SvcWrap32<SetMemoryAttribute32>, "SetMemoryAttribute32"},
+ {0x04, SvcWrap32<MapMemory32>, "MapMemory32"},
+ {0x05, SvcWrap32<UnmapMemory32>, "UnmapMemory32"},
{0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"},
- {0x07, nullptr, "ExitProcess32"},
- {0x08, nullptr, "CreateThread32"},
- {0x09, nullptr, "StartThread32"},
- {0x0a, nullptr, "ExitThread32"},
- {0x0b, nullptr, "SleepThread32"},
+ {0x07, SvcWrap32<ExitProcess32>, "ExitProcess32"},
+ {0x08, SvcWrap32<CreateThread32>, "CreateThread32"},
+ {0x09, SvcWrap32<StartThread32>, "StartThread32"},
+ {0x0a, SvcWrap32<ExitThread32>, "ExitThread32"},
+ {0x0b, SvcWrap32<SleepThread32>, "SleepThread32"},
{0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"},
- {0x0d, nullptr, "SetThreadPriority32"},
- {0x0e, nullptr, "GetThreadCoreMask32"},
- {0x0f, nullptr, "SetThreadCoreMask32"},
- {0x10, nullptr, "GetCurrentProcessorNumber32"},
- {0x11, nullptr, "SignalEvent32"},
- {0x12, nullptr, "ClearEvent32"},
- {0x13, nullptr, "MapSharedMemory32"},
+ {0x0d, SvcWrap32<SetThreadPriority32>, "SetThreadPriority32"},
+ {0x0e, SvcWrap32<GetThreadCoreMask32>, "GetThreadCoreMask32"},
+ {0x0f, SvcWrap32<SetThreadCoreMask32>, "SetThreadCoreMask32"},
+ {0x10, SvcWrap32<GetCurrentProcessorNumber32>, "GetCurrentProcessorNumber32"},
+ {0x11, SvcWrap32<SignalEvent32>, "SignalEvent32"},
+ {0x12, SvcWrap32<ClearEvent32>, "ClearEvent32"},
+ {0x13, SvcWrap32<MapSharedMemory32>, "MapSharedMemory32"},
{0x14, nullptr, "UnmapSharedMemory32"},
- {0x15, nullptr, "CreateTransferMemory32"},
+ {0x15, SvcWrap32<CreateTransferMemory32>, "CreateTransferMemory32"},
{0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"},
- {0x17, nullptr, "ResetSignal32"},
+ {0x17, SvcWrap32<ResetSignal32>, "ResetSignal32"},
{0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"},
- {0x19, nullptr, "CancelSynchronization32"},
- {0x1a, nullptr, "ArbitrateLock32"},
- {0x1b, nullptr, "ArbitrateUnlock32"},
- {0x1c, nullptr, "WaitProcessWideKeyAtomic32"},
+ {0x19, SvcWrap32<CancelSynchronization32>, "CancelSynchronization32"},
+ {0x1a, SvcWrap32<ArbitrateLock32>, "ArbitrateLock32"},
+ {0x1b, SvcWrap32<ArbitrateUnlock32>, "ArbitrateUnlock32"},
+ {0x1c, SvcWrap32<WaitProcessWideKeyAtomic32>, "WaitProcessWideKeyAtomic32"},
{0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"},
- {0x1e, nullptr, "GetSystemTick32"},
+ {0x1e, SvcWrap32<GetSystemTick32>, "GetSystemTick32"},
{0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"},
{0x20, nullptr, "Unknown"},
{0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"},
{0x22, nullptr, "SendSyncRequestWithUserBuffer32"},
{0x23, nullptr, "Unknown"},
- {0x24, nullptr, "GetProcessId32"},
+ {0x24, SvcWrap32<GetProcessId32>, "GetProcessId32"},
{0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"},
- {0x26, nullptr, "Break32"},
+ {0x26, SvcWrap32<Break32>, "Break32"},
{0x27, nullptr, "OutputDebugString32"},
{0x28, nullptr, "Unknown"},
{0x29, SvcWrap32<GetInfo32>, "GetInfo32"},
{0x2a, nullptr, "Unknown"},
{0x2b, nullptr, "Unknown"},
- {0x2c, nullptr, "MapPhysicalMemory32"},
- {0x2d, nullptr, "UnmapPhysicalMemory32"},
+ {0x2c, SvcWrap32<MapPhysicalMemory32>, "MapPhysicalMemory32"},
+ {0x2d, SvcWrap32<UnmapPhysicalMemory32>, "UnmapPhysicalMemory32"},
{0x2e, nullptr, "Unknown"},
{0x2f, nullptr, "Unknown"},
{0x30, nullptr, "Unknown"},
{0x31, nullptr, "Unknown"},
- {0x32, nullptr, "SetThreadActivity32"},
- {0x33, nullptr, "GetThreadContext32"},
- {0x34, nullptr, "WaitForAddress32"},
- {0x35, nullptr, "SignalToAddress32"},
+ {0x32, SvcWrap32<SetThreadActivity32>, "SetThreadActivity32"},
+ {0x33, SvcWrap32<GetThreadContext32>, "GetThreadContext32"},
+ {0x34, SvcWrap32<WaitForAddress32>, "WaitForAddress32"},
+ {0x35, SvcWrap32<SignalToAddress32>, "SignalToAddress32"},
{0x36, nullptr, "Unknown"},
{0x37, nullptr, "Unknown"},
{0x38, nullptr, "Unknown"},
@@ -2219,7 +2433,7 @@ static const FunctionDef SVC_Table_32[] = {
{0x42, nullptr, "Unknown"},
{0x43, nullptr, "ReplyAndReceive32"},
{0x44, nullptr, "Unknown"},
- {0x45, nullptr, "CreateEvent32"},
+ {0x45, SvcWrap32<CreateEvent32>, "CreateEvent32"},
{0x46, nullptr, "Unknown"},
{0x47, nullptr, "Unknown"},
{0x48, nullptr, "Unknown"},
@@ -2245,7 +2459,7 @@ static const FunctionDef SVC_Table_32[] = {
{0x5c, nullptr, "Unknown"},
{0x5d, nullptr, "Unknown"},
{0x5e, nullptr, "Unknown"},
- {0x5F, nullptr, "FlushProcessDataCache32"},
+ {0x5F, SvcWrap32<FlushProcessDataCache32>, "FlushProcessDataCache32"},
{0x60, nullptr, "Unknown"},
{0x61, nullptr, "Unknown"},
{0x62, nullptr, "Unknown"},
@@ -2423,13 +2637,10 @@ static const FunctionDef* GetSVCInfo64(u32 func_num) {
return &SVC_Table_64[func_num];
}
-MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
-
void Call(Core::System& system, u32 immediate) {
- MICROPROFILE_SCOPE(Kernel_SVC);
-
- // Lock the global kernel mutex when we enter the kernel HLE.
- std::lock_guard lock{HLE::g_hle_lock};
+ system.ExitDynarmicProfile();
+ auto& kernel = system.Kernel();
+ kernel.EnterSVCProfile();
const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
: GetSVCInfo32(immediate);
@@ -2442,6 +2653,9 @@ void Call(Core::System& system, u32 immediate) {
} else {
LOG_CRITICAL(Kernel_SVC, "Unknown SVC function 0x{:X}", immediate);
}
+
+ kernel.ExitSVCProfile();
+ system.EnterDynarmicProfile();
}
} // namespace Kernel::Svc
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 7d735e3fa..0b6dd9df0 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -350,13 +350,50 @@ void SvcWrap64(Core::System& system) {
func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));
}
-// Used by QueryMemory32
+// Used by QueryMemory32, ArbitrateLock32
template <ResultCode func(Core::System&, u32, u32, u32)>
void SvcWrap32(Core::System& system) {
FuncReturn32(system,
func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw);
}
+// Used by Break32
+template <void func(Core::System&, u32, u32, u32)>
+void SvcWrap32(Core::System& system) {
+ func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2));
+}
+
+// Used by ExitProcess32, ExitThread32
+template <void func(Core::System&)>
+void SvcWrap32(Core::System& system) {
+ func(system);
+}
+
+// Used by GetCurrentProcessorNumber32
+template <u32 func(Core::System&)>
+void SvcWrap32(Core::System& system) {
+ FuncReturn32(system, func(system));
+}
+
+// Used by SleepThread32
+template <void func(Core::System&, u32, u32)>
+void SvcWrap32(Core::System& system) {
+ func(system, Param32(system, 0), Param32(system, 1));
+}
+
+// Used by CreateThread32
+template <ResultCode func(Core::System&, Handle*, u32, u32, u32, u32, s32)>
+void SvcWrap32(Core::System& system) {
+ Handle param_1 = 0;
+
+ const u32 retval = func(system, &param_1, Param32(system, 0), Param32(system, 1),
+ Param32(system, 2), Param32(system, 3), Param32(system, 4))
+ .raw;
+
+ system.CurrentArmInterface().SetReg(1, param_1);
+ FuncReturn(system, retval);
+}
+
// Used by GetInfo32
template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)>
void SvcWrap32(Core::System& system) {
@@ -393,18 +430,114 @@ void SvcWrap32(Core::System& system) {
FuncReturn(system, retval);
}
+// Used by GetSystemTick32
+template <void func(Core::System&, u32*, u32*)>
+void SvcWrap32(Core::System& system) {
+ u32 param_1 = 0;
+ u32 param_2 = 0;
+
+ func(system, &param_1, &param_2);
+ system.CurrentArmInterface().SetReg(0, param_1);
+ system.CurrentArmInterface().SetReg(1, param_2);
+}
+
+// Used by CreateEvent32
+template <ResultCode func(Core::System&, Handle*, Handle*)>
+void SvcWrap32(Core::System& system) {
+ Handle param_1 = 0;
+ Handle param_2 = 0;
+
+ const u32 retval = func(system, &param_1, &param_2).raw;
+ system.CurrentArmInterface().SetReg(1, param_1);
+ system.CurrentArmInterface().SetReg(2, param_2);
+ FuncReturn(system, retval);
+}
+
+// Used by GetThreadId32
+template <ResultCode func(Core::System&, Handle, u32*, u32*, u32*)>
+void SvcWrap32(Core::System& system) {
+ u32 param_1 = 0;
+ u32 param_2 = 0;
+ u32 param_3 = 0;
+
+ const u32 retval = func(system, Param32(system, 2), &param_1, &param_2, &param_3).raw;
+ system.CurrentArmInterface().SetReg(1, param_1);
+ system.CurrentArmInterface().SetReg(2, param_2);
+ system.CurrentArmInterface().SetReg(3, param_3);
+ FuncReturn(system, retval);
+}
+
// Used by SignalProcessWideKey32
template <void func(Core::System&, u32, s32)>
void SvcWrap32(Core::System& system) {
func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1)));
}
-// Used by SendSyncRequest32
+// Used by SetThreadPriority32
+template <ResultCode func(Core::System&, Handle, u32)>
+void SvcWrap32(Core::System& system) {
+ const u32 retval =
+ func(system, static_cast<Handle>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw;
+ FuncReturn(system, retval);
+}
+
+// Used by SetThreadCoreMask32
+template <ResultCode func(Core::System&, Handle, u32, u32, u32)>
+void SvcWrap32(Core::System& system) {
+ const u32 retval =
+ func(system, static_cast<Handle>(Param(system, 0)), static_cast<u32>(Param(system, 1)),
+ static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3)))
+ .raw;
+ FuncReturn(system, retval);
+}
+
+// Used by WaitProcessWideKeyAtomic32
+template <ResultCode func(Core::System&, u32, u32, Handle, u32, u32)>
+void SvcWrap32(Core::System& system) {
+ const u32 retval =
+ func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)),
+ static_cast<Handle>(Param(system, 2)), static_cast<u32>(Param(system, 3)),
+ static_cast<u32>(Param(system, 4)))
+ .raw;
+ FuncReturn(system, retval);
+}
+
+// Used by WaitForAddress32
+template <ResultCode func(Core::System&, u32, u32, s32, u32, u32)>
+void SvcWrap32(Core::System& system) {
+ const u32 retval = func(system, static_cast<u32>(Param(system, 0)),
+ static_cast<u32>(Param(system, 1)), static_cast<s32>(Param(system, 2)),
+ static_cast<u32>(Param(system, 3)), static_cast<u32>(Param(system, 4)))
+ .raw;
+ FuncReturn(system, retval);
+}
+
+// Used by SignalToAddress32
+template <ResultCode func(Core::System&, u32, u32, s32, s32)>
+void SvcWrap32(Core::System& system) {
+ const u32 retval =
+ func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1)),
+ static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
+ .raw;
+ FuncReturn(system, retval);
+}
+
+// Used by SendSyncRequest32, ArbitrateUnlock32
template <ResultCode func(Core::System&, u32)>
void SvcWrap32(Core::System& system) {
FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
}
+// Used by CreateTransferMemory32
+template <ResultCode func(Core::System&, Handle*, u32, u32, u32)>
+void SvcWrap32(Core::System& system) {
+ Handle handle = 0;
+ const u32 retval =
+ func(system, &handle, Param32(system, 1), Param32(system, 2), Param32(system, 3)).raw;
+ system.CurrentArmInterface().SetReg(1, handle);
+ FuncReturn(system, retval);
+}
+
// Used by WaitSynchronization32
template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)>
void SvcWrap32(Core::System& system) {
diff --git a/src/core/hle/kernel/synchronization.cpp b/src/core/hle/kernel/synchronization.cpp
index dc37fad1a..851b702a5 100644
--- a/src/core/hle/kernel/synchronization.cpp
+++ b/src/core/hle/kernel/synchronization.cpp
@@ -10,78 +10,107 @@
#include "core/hle/kernel/synchronization.h"
#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
namespace Kernel {
-/// Default thread wakeup callback for WaitSynchronization
-static bool DefaultThreadWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<SynchronizationObject> object,
- std::size_t index) {
- ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
-
- if (reason == ThreadWakeupReason::Timeout) {
- thread->SetWaitSynchronizationResult(RESULT_TIMEOUT);
- return true;
- }
-
- ASSERT(reason == ThreadWakeupReason::Signal);
- thread->SetWaitSynchronizationResult(RESULT_SUCCESS);
- thread->SetWaitSynchronizationOutput(static_cast<u32>(index));
- return true;
-}
-
Synchronization::Synchronization(Core::System& system) : system{system} {}
void Synchronization::SignalObject(SynchronizationObject& obj) const {
+ auto& kernel = system.Kernel();
+ SchedulerLock lock(kernel);
+ auto& time_manager = kernel.TimeManager();
if (obj.IsSignaled()) {
- obj.WakeupAllWaitingThreads();
+ for (auto thread : obj.GetWaitingThreads()) {
+ if (thread->GetSchedulingStatus() == ThreadSchedStatus::Paused) {
+ if (thread->GetStatus() != ThreadStatus::WaitHLEEvent) {
+ ASSERT(thread->GetStatus() == ThreadStatus::WaitSynch);
+ ASSERT(thread->IsWaitingSync());
+ }
+ thread->SetSynchronizationResults(&obj, RESULT_SUCCESS);
+ thread->ResumeFromWait();
+ }
+ }
+ obj.ClearWaitingThreads();
}
}
std::pair<ResultCode, Handle> Synchronization::WaitFor(
std::vector<std::shared_ptr<SynchronizationObject>>& sync_objects, s64 nano_seconds) {
+ auto& kernel = system.Kernel();
auto* const thread = system.CurrentScheduler().GetCurrentThread();
- // Find the first object that is acquirable in the provided list of objects
- const auto itr = std::find_if(sync_objects.begin(), sync_objects.end(),
- [thread](const std::shared_ptr<SynchronizationObject>& object) {
- return object->IsSignaled();
- });
-
- if (itr != sync_objects.end()) {
- // We found a ready object, acquire it and set the result value
- SynchronizationObject* object = itr->get();
- object->Acquire(thread);
- const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
- return {RESULT_SUCCESS, index};
+ Handle event_handle = InvalidHandle;
+ {
+ SchedulerLockAndSleep lock(kernel, event_handle, thread, nano_seconds);
+ const auto itr =
+ std::find_if(sync_objects.begin(), sync_objects.end(),
+ [thread](const std::shared_ptr<SynchronizationObject>& object) {
+ return object->IsSignaled();
+ });
+
+ if (itr != sync_objects.end()) {
+ // We found a ready object, acquire it and set the result value
+ SynchronizationObject* object = itr->get();
+ object->Acquire(thread);
+ const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
+ lock.CancelSleep();
+ return {RESULT_SUCCESS, index};
+ }
+
+ if (nano_seconds == 0) {
+ lock.CancelSleep();
+ return {RESULT_TIMEOUT, InvalidHandle};
+ }
+
+ if (thread->IsPendingTermination()) {
+ lock.CancelSleep();
+ return {ERR_THREAD_TERMINATING, InvalidHandle};
+ }
+
+ if (thread->IsSyncCancelled()) {
+ thread->SetSyncCancelled(false);
+ lock.CancelSleep();
+ return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
+ }
+
+ for (auto& object : sync_objects) {
+ object->AddWaitingThread(SharedFrom(thread));
+ }
+
+ thread->SetSynchronizationObjects(&sync_objects);
+ thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
+ thread->SetStatus(ThreadStatus::WaitSynch);
+ thread->SetWaitingSync(true);
}
+ thread->SetWaitingSync(false);
- // No objects were ready to be acquired, prepare to suspend the thread.
-
- // If a timeout value of 0 was provided, just return the Timeout error code instead of
- // suspending the thread.
- if (nano_seconds == 0) {
- return {RESULT_TIMEOUT, InvalidHandle};
+ if (event_handle != InvalidHandle) {
+ auto& time_manager = kernel.TimeManager();
+ time_manager.UnscheduleTimeEvent(event_handle);
}
- if (thread->IsSyncCancelled()) {
- thread->SetSyncCancelled(false);
- return {ERR_SYNCHRONIZATION_CANCELED, InvalidHandle};
+ {
+ SchedulerLock lock(kernel);
+ ResultCode signaling_result = thread->GetSignalingResult();
+ SynchronizationObject* signaling_object = thread->GetSignalingObject();
+ thread->SetSynchronizationObjects(nullptr);
+ auto shared_thread = SharedFrom(thread);
+ for (auto& obj : sync_objects) {
+ obj->RemoveWaitingThread(shared_thread);
+ }
+ if (signaling_object != nullptr) {
+ const auto itr = std::find_if(
+ sync_objects.begin(), sync_objects.end(),
+ [signaling_object](const std::shared_ptr<SynchronizationObject>& object) {
+ return object.get() == signaling_object;
+ });
+ ASSERT(itr != sync_objects.end());
+ signaling_object->Acquire(thread);
+ const u32 index = static_cast<s32>(std::distance(sync_objects.begin(), itr));
+ return {signaling_result, index};
+ }
+ return {signaling_result, -1};
}
-
- for (auto& object : sync_objects) {
- object->AddWaitingThread(SharedFrom(thread));
- }
-
- thread->SetSynchronizationObjects(std::move(sync_objects));
- thread->SetStatus(ThreadStatus::WaitSynch);
-
- // Create an event to wake the thread up after the specified nanosecond delay has passed
- thread->WakeAfterDelay(nano_seconds);
- thread->SetWakeupCallback(DefaultThreadWakeupCallback);
-
- system.PrepareReschedule(thread->GetProcessorID());
-
- return {RESULT_TIMEOUT, InvalidHandle};
}
} // namespace Kernel
diff --git a/src/core/hle/kernel/synchronization_object.cpp b/src/core/hle/kernel/synchronization_object.cpp
index 43f3eef18..ba4d39157 100644
--- a/src/core/hle/kernel/synchronization_object.cpp
+++ b/src/core/hle/kernel/synchronization_object.cpp
@@ -38,68 +38,8 @@ void SynchronizationObject::RemoveWaitingThread(std::shared_ptr<Thread> thread)
waiting_threads.erase(itr);
}
-std::shared_ptr<Thread> SynchronizationObject::GetHighestPriorityReadyThread() const {
- Thread* candidate = nullptr;
- u32 candidate_priority = THREADPRIO_LOWEST + 1;
-
- for (const auto& thread : waiting_threads) {
- const ThreadStatus thread_status = thread->GetStatus();
-
- // The list of waiting threads must not contain threads that are not waiting to be awakened.
- ASSERT_MSG(thread_status == ThreadStatus::WaitSynch ||
- thread_status == ThreadStatus::WaitHLEEvent,
- "Inconsistent thread statuses in waiting_threads");
-
- if (thread->GetPriority() >= candidate_priority)
- continue;
-
- if (ShouldWait(thread.get()))
- continue;
-
- candidate = thread.get();
- candidate_priority = thread->GetPriority();
- }
-
- return SharedFrom(candidate);
-}
-
-void SynchronizationObject::WakeupWaitingThread(std::shared_ptr<Thread> thread) {
- ASSERT(!ShouldWait(thread.get()));
-
- if (!thread) {
- return;
- }
-
- if (thread->IsSleepingOnWait()) {
- for (const auto& object : thread->GetSynchronizationObjects()) {
- ASSERT(!object->ShouldWait(thread.get()));
- object->Acquire(thread.get());
- }
- } else {
- Acquire(thread.get());
- }
-
- const std::size_t index = thread->GetSynchronizationObjectIndex(SharedFrom(this));
-
- thread->ClearSynchronizationObjects();
-
- thread->CancelWakeupTimer();
-
- bool resume = true;
- if (thread->HasWakeupCallback()) {
- resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Signal, thread, SharedFrom(this),
- index);
- }
- if (resume) {
- thread->ResumeFromWait();
- kernel.PrepareReschedule(thread->GetProcessorID());
- }
-}
-
-void SynchronizationObject::WakeupAllWaitingThreads() {
- while (auto thread = GetHighestPriorityReadyThread()) {
- WakeupWaitingThread(thread);
- }
+void SynchronizationObject::ClearWaitingThreads() {
+ waiting_threads.clear();
}
const std::vector<std::shared_ptr<Thread>>& SynchronizationObject::GetWaitingThreads() const {
diff --git a/src/core/hle/kernel/synchronization_object.h b/src/core/hle/kernel/synchronization_object.h
index 741c31faf..f89b24204 100644
--- a/src/core/hle/kernel/synchronization_object.h
+++ b/src/core/hle/kernel/synchronization_object.h
@@ -12,6 +12,7 @@
namespace Kernel {
class KernelCore;
+class Synchronization;
class Thread;
/// Class that represents a Kernel object that a thread can be waiting on
@@ -49,24 +50,11 @@ public:
*/
void RemoveWaitingThread(std::shared_ptr<Thread> thread);
- /**
- * Wake up all threads waiting on this object that can be awoken, in priority order,
- * and set the synchronization result and output of the thread.
- */
- void WakeupAllWaitingThreads();
-
- /**
- * Wakes up a single thread waiting on this object.
- * @param thread Thread that is waiting on this object to wakeup.
- */
- void WakeupWaitingThread(std::shared_ptr<Thread> thread);
-
- /// Obtains the highest priority thread that is ready to run from this object's waiting list.
- std::shared_ptr<Thread> GetHighestPriorityReadyThread() const;
-
/// Get a const reference to the waiting threads list for debug use
const std::vector<std::shared_ptr<Thread>>& GetWaitingThreads() const;
+ void ClearWaitingThreads();
+
protected:
bool is_signaled{}; // Tells if this sync object is signalled;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index db7f379ac..2b1092697 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -9,12 +9,21 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/fiber.h"
#include "common/logging/log.h"
#include "common/thread_queue_list.h"
#include "core/arm/arm_interface.h"
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic_32.h"
+#include "core/arm/dynarmic/arm_dynarmic_64.h"
+#endif
+#include "core/arm/cpu_interrupt_handler.h"
+#include "core/arm/exclusive_monitor.h"
+#include "core/arm/unicorn/arm_unicorn.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/cpu_manager.h"
#include "core/hardware_properties.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
@@ -23,6 +32,7 @@
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
#include "core/hle/result.h"
#include "core/memory.h"
@@ -44,46 +54,26 @@ Thread::Thread(KernelCore& kernel) : SynchronizationObject{kernel} {}
Thread::~Thread() = default;
void Thread::Stop() {
- // Cancel any outstanding wakeup events for this thread
- Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
- global_handle);
- kernel.GlobalHandleTable().Close(global_handle);
- global_handle = 0;
- SetStatus(ThreadStatus::Dead);
- Signal();
-
- // Clean up any dangling references in objects that this thread was waiting for
- for (auto& wait_object : wait_objects) {
- wait_object->RemoveWaitingThread(SharedFrom(this));
- }
- wait_objects.clear();
-
- owner_process->UnregisterThread(this);
-
- // Mark the TLS slot in the thread's page as free.
- owner_process->FreeTLSRegion(tls_address);
-}
-
-void Thread::WakeAfterDelay(s64 nanoseconds) {
- // Don't schedule a wakeup if the thread wants to wait forever
- if (nanoseconds == -1)
- return;
+ {
+ SchedulerLock lock(kernel);
+ SetStatus(ThreadStatus::Dead);
+ Signal();
+ kernel.GlobalHandleTable().Close(global_handle);
- // This function might be called from any thread so we have to be cautious and use the
- // thread-safe version of ScheduleEvent.
- const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
- Core::System::GetInstance().CoreTiming().ScheduleEvent(
- cycles, kernel.ThreadWakeupCallbackEventType(), global_handle);
-}
+ if (owner_process) {
+ owner_process->UnregisterThread(this);
-void Thread::CancelWakeupTimer() {
- Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
- global_handle);
+ // Mark the TLS slot in the thread's page as free.
+ owner_process->FreeTLSRegion(tls_address);
+ }
+ arm_interface.reset();
+ has_exited = true;
+ }
+ global_handle = 0;
}
void Thread::ResumeFromWait() {
- ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects");
-
+ SchedulerLock lock(kernel);
switch (status) {
case ThreadStatus::Paused:
case ThreadStatus::WaitSynch:
@@ -99,7 +89,7 @@ void Thread::ResumeFromWait() {
case ThreadStatus::Ready:
// The thread's wakeup callback must have already been cleared when the thread was first
// awoken.
- ASSERT(wakeup_callback == nullptr);
+ ASSERT(hle_callback == nullptr);
// If the thread is waiting on multiple wait objects, it might be awoken more than once
// before actually resuming. We can ignore subsequent wakeups if the thread status has
// already been set to ThreadStatus::Ready.
@@ -115,24 +105,31 @@ void Thread::ResumeFromWait() {
return;
}
- wakeup_callback = nullptr;
+ SetStatus(ThreadStatus::Ready);
+}
+
+void Thread::OnWakeUp() {
+ SchedulerLock lock(kernel);
- if (activity == ThreadActivity::Paused) {
- SetStatus(ThreadStatus::Paused);
- return;
- }
+ SetStatus(ThreadStatus::Ready);
+}
+ResultCode Thread::Start() {
+ SchedulerLock lock(kernel);
SetStatus(ThreadStatus::Ready);
+ return RESULT_SUCCESS;
}
void Thread::CancelWait() {
- if (GetSchedulingStatus() != ThreadSchedStatus::Paused) {
+ SchedulerLock lock(kernel);
+ if (GetSchedulingStatus() != ThreadSchedStatus::Paused || !is_waiting_on_sync) {
is_sync_cancelled = true;
return;
}
+ // TODO(Blinkhawk): Implement cancel of server session
is_sync_cancelled = false;
- SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED);
- ResumeFromWait();
+ SetSynchronizationResults(nullptr, ERR_SYNCHRONIZATION_CANCELED);
+ SetStatus(ThreadStatus::Ready);
}
static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top,
@@ -153,12 +150,29 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context,
context.fpcr = 0;
}
-ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::string name,
- VAddr entry_point, u32 priority, u64 arg,
- s32 processor_id, VAddr stack_top,
- Process& owner_process) {
+std::shared_ptr<Common::Fiber>& Thread::GetHostContext() {
+ return host_context;
+}
+
+ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags,
+ std::string name, VAddr entry_point, u32 priority,
+ u64 arg, s32 processor_id, VAddr stack_top,
+ Process* owner_process) {
+ std::function<void(void*)> init_func = system.GetCpuManager().GetGuestThreadStartFunc();
+ void* init_func_parameter = system.GetCpuManager().GetStartFuncParamater();
+ return Create(system, type_flags, name, entry_point, priority, arg, processor_id, stack_top,
+ owner_process, std::move(init_func), init_func_parameter);
+}
+
+ResultVal<std::shared_ptr<Thread>> Thread::Create(Core::System& system, ThreadType type_flags,
+ std::string name, VAddr entry_point, u32 priority,
+ u64 arg, s32 processor_id, VAddr stack_top,
+ Process* owner_process,
+ std::function<void(void*)>&& thread_start_func,
+ void* thread_start_parameter) {
+ auto& kernel = system.Kernel();
// Check if priority is in ranged. Lowest priority -> highest priority id.
- if (priority > THREADPRIO_LOWEST) {
+ if (priority > THREADPRIO_LOWEST && ((type_flags & THREADTYPE_IDLE) == 0)) {
LOG_ERROR(Kernel_SVC, "Invalid thread priority: {}", priority);
return ERR_INVALID_THREAD_PRIORITY;
}
@@ -168,11 +182,12 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
return ERR_INVALID_PROCESSOR_ID;
}
- auto& system = Core::System::GetInstance();
- if (!system.Memory().IsValidVirtualAddress(owner_process, entry_point)) {
- LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
- // TODO (bunnei): Find the correct error code to use here
- return RESULT_UNKNOWN;
+ if (owner_process) {
+ if (!system.Memory().IsValidVirtualAddress(*owner_process, entry_point)) {
+ LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point);
+ // TODO (bunnei): Find the correct error code to use here
+ return RESULT_UNKNOWN;
+ }
}
std::shared_ptr<Thread> thread = std::make_shared<Thread>(kernel);
@@ -183,51 +198,82 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
thread->stack_top = stack_top;
thread->tpidr_el0 = 0;
thread->nominal_priority = thread->current_priority = priority;
- thread->last_running_ticks = system.CoreTiming().GetTicks();
+ thread->last_running_ticks = 0;
thread->processor_id = processor_id;
thread->ideal_core = processor_id;
thread->affinity_mask = 1ULL << processor_id;
- thread->wait_objects.clear();
+ thread->wait_objects = nullptr;
thread->mutex_wait_address = 0;
thread->condvar_wait_address = 0;
thread->wait_handle = 0;
thread->name = std::move(name);
thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap();
- thread->owner_process = &owner_process;
- auto& scheduler = kernel.GlobalScheduler();
- scheduler.AddThread(thread);
- thread->tls_address = thread->owner_process->CreateTLSRegion();
-
- thread->owner_process->RegisterThread(thread.get());
+ thread->owner_process = owner_process;
+ thread->type = type_flags;
+ if ((type_flags & THREADTYPE_IDLE) == 0) {
+ auto& scheduler = kernel.GlobalScheduler();
+ scheduler.AddThread(thread);
+ }
+ if (owner_process) {
+ thread->tls_address = thread->owner_process->CreateTLSRegion();
+ thread->owner_process->RegisterThread(thread.get());
+ } else {
+ thread->tls_address = 0;
+ }
+ // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
+ // to initialize the context
+ thread->arm_interface.reset();
+ if ((type_flags & THREADTYPE_HLE) == 0) {
+#ifdef ARCHITECTURE_x86_64
+ if (owner_process && !owner_process->Is64BitProcess()) {
+ thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_32>(
+ system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
+ processor_id);
+ } else {
+ thread->arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
+ system, kernel.Interrupts(), kernel.IsMulticore(), kernel.GetExclusiveMonitor(),
+ processor_id);
+ }
- ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
- static_cast<u32>(entry_point), static_cast<u32>(arg));
- ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
+#else
+ if (owner_process && !owner_process->Is64BitProcess()) {
+ thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
+ system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch32,
+ processor_id);
+ } else {
+ thread->arm_interface = std::make_shared<Core::ARM_Unicorn>(
+ system, kernel.Interrupts(), kernel.IsMulticore(), ARM_Unicorn::Arch::AArch64,
+ processor_id);
+ }
+ LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#endif
+ ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
+ static_cast<u32>(entry_point), static_cast<u32>(arg));
+ ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
+ }
+ thread->host_context =
+ std::make_shared<Common::Fiber>(std::move(thread_start_func), thread_start_parameter);
return MakeResult<std::shared_ptr<Thread>>(std::move(thread));
}
void Thread::SetPriority(u32 priority) {
+ SchedulerLock lock(kernel);
ASSERT_MSG(priority <= THREADPRIO_LOWEST && priority >= THREADPRIO_HIGHEST,
"Invalid priority value.");
nominal_priority = priority;
UpdatePriority();
}
-void Thread::SetWaitSynchronizationResult(ResultCode result) {
- context_32.cpu_registers[0] = result.raw;
- context_64.cpu_registers[0] = result.raw;
-}
-
-void Thread::SetWaitSynchronizationOutput(s32 output) {
- context_32.cpu_registers[1] = output;
- context_64.cpu_registers[1] = output;
+void Thread::SetSynchronizationResults(SynchronizationObject* object, ResultCode result) {
+ signaling_object = object;
+ signaling_result = result;
}
s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
- ASSERT_MSG(!wait_objects.empty(), "Thread is not waiting for anything");
- const auto match = std::find(wait_objects.rbegin(), wait_objects.rend(), object);
- return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1);
+ ASSERT_MSG(!wait_objects->empty(), "Thread is not waiting for anything");
+ const auto match = std::find(wait_objects->rbegin(), wait_objects->rend(), object);
+ return static_cast<s32>(std::distance(match, wait_objects->rend()) - 1);
}
VAddr Thread::GetCommandBufferAddress() const {
@@ -236,6 +282,14 @@ VAddr Thread::GetCommandBufferAddress() const {
return GetTLSAddress() + command_header_offset;
}
+Core::ARM_Interface& Thread::ArmInterface() {
+ return *arm_interface;
+}
+
+const Core::ARM_Interface& Thread::ArmInterface() const {
+ return *arm_interface;
+}
+
void Thread::SetStatus(ThreadStatus new_status) {
if (new_status == status) {
return;
@@ -257,10 +311,6 @@ void Thread::SetStatus(ThreadStatus new_status) {
break;
}
- if (status == ThreadStatus::Running) {
- last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks();
- }
-
status = new_status;
}
@@ -341,75 +391,116 @@ void Thread::UpdatePriority() {
lock_owner->UpdatePriority();
}
-void Thread::ChangeCore(u32 core, u64 mask) {
- SetCoreAndAffinityMask(core, mask);
-}
-
bool Thread::AllSynchronizationObjectsReady() const {
- return std::none_of(wait_objects.begin(), wait_objects.end(),
+ return std::none_of(wait_objects->begin(), wait_objects->end(),
[this](const std::shared_ptr<SynchronizationObject>& object) {
return object->ShouldWait(this);
});
}
-bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<SynchronizationObject> object,
- std::size_t index) {
- ASSERT(wakeup_callback);
- return wakeup_callback(reason, std::move(thread), std::move(object), index);
+bool Thread::InvokeHLECallback(std::shared_ptr<Thread> thread) {
+ ASSERT(hle_callback);
+ return hle_callback(std::move(thread));
}
-void Thread::SetActivity(ThreadActivity value) {
- activity = value;
+ResultCode Thread::SetActivity(ThreadActivity value) {
+ SchedulerLock lock(kernel);
+
+ auto sched_status = GetSchedulingStatus();
+
+ if (sched_status != ThreadSchedStatus::Runnable && sched_status != ThreadSchedStatus::Paused) {
+ return ERR_INVALID_STATE;
+ }
+
+ if (IsPendingTermination()) {
+ return RESULT_SUCCESS;
+ }
if (value == ThreadActivity::Paused) {
- // Set status if not waiting
- if (status == ThreadStatus::Ready || status == ThreadStatus::Running) {
- SetStatus(ThreadStatus::Paused);
- kernel.PrepareReschedule(processor_id);
+ if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) != 0) {
+ return ERR_INVALID_STATE;
+ }
+ AddSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);
+ } else {
+ if ((pausing_state & static_cast<u32>(ThreadSchedFlags::ThreadPauseFlag)) == 0) {
+ return ERR_INVALID_STATE;
}
- } else if (status == ThreadStatus::Paused) {
- // Ready to reschedule
- ResumeFromWait();
+ RemoveSchedulingFlag(ThreadSchedFlags::ThreadPauseFlag);
}
+ return RESULT_SUCCESS;
}
-void Thread::Sleep(s64 nanoseconds) {
- // Sleep current thread and check for next thread to schedule
- SetStatus(ThreadStatus::WaitSleep);
+ResultCode Thread::Sleep(s64 nanoseconds) {
+ Handle event_handle{};
+ {
+ SchedulerLockAndSleep lock(kernel, event_handle, this, nanoseconds);
+ SetStatus(ThreadStatus::WaitSleep);
+ }
- // Create an event to wake the thread up after the specified nanosecond delay has passed
- WakeAfterDelay(nanoseconds);
+ if (event_handle != InvalidHandle) {
+ auto& time_manager = kernel.TimeManager();
+ time_manager.UnscheduleTimeEvent(event_handle);
+ }
+ return RESULT_SUCCESS;
+}
+
+std::pair<ResultCode, bool> Thread::YieldSimple() {
+ bool is_redundant = false;
+ {
+ SchedulerLock lock(kernel);
+ is_redundant = kernel.GlobalScheduler().YieldThread(this);
+ }
+ return {RESULT_SUCCESS, is_redundant};
+}
+
+std::pair<ResultCode, bool> Thread::YieldAndBalanceLoad() {
+ bool is_redundant = false;
+ {
+ SchedulerLock lock(kernel);
+ is_redundant = kernel.GlobalScheduler().YieldThreadAndBalanceLoad(this);
+ }
+ return {RESULT_SUCCESS, is_redundant};
}
-bool Thread::YieldSimple() {
- auto& scheduler = kernel.GlobalScheduler();
- return scheduler.YieldThread(this);
+std::pair<ResultCode, bool> Thread::YieldAndWaitForLoadBalancing() {
+ bool is_redundant = false;
+ {
+ SchedulerLock lock(kernel);
+ is_redundant = kernel.GlobalScheduler().YieldThreadAndWaitForLoadBalancing(this);
+ }
+ return {RESULT_SUCCESS, is_redundant};
}
-bool Thread::YieldAndBalanceLoad() {
- auto& scheduler = kernel.GlobalScheduler();
- return scheduler.YieldThreadAndBalanceLoad(this);
+void Thread::AddSchedulingFlag(ThreadSchedFlags flag) {
+ const u32 old_state = scheduling_state;
+ pausing_state |= static_cast<u32>(flag);
+ const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
+ scheduling_state = base_scheduling | pausing_state;
+ kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
}
-bool Thread::YieldAndWaitForLoadBalancing() {
- auto& scheduler = kernel.GlobalScheduler();
- return scheduler.YieldThreadAndWaitForLoadBalancing(this);
+void Thread::RemoveSchedulingFlag(ThreadSchedFlags flag) {
+ const u32 old_state = scheduling_state;
+ pausing_state &= ~static_cast<u32>(flag);
+ const u32 base_scheduling = static_cast<u32>(GetSchedulingStatus());
+ scheduling_state = base_scheduling | pausing_state;
+ kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
}
void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) {
- const u32 old_flags = scheduling_state;
+ const u32 old_state = scheduling_state;
scheduling_state = (scheduling_state & static_cast<u32>(ThreadSchedMasks::HighMask)) |
static_cast<u32>(new_status);
- AdjustSchedulingOnStatus(old_flags);
+ kernel.GlobalScheduler().AdjustSchedulingOnStatus(this, old_state);
}
void Thread::SetCurrentPriority(u32 new_priority) {
const u32 old_priority = std::exchange(current_priority, new_priority);
- AdjustSchedulingOnPriority(old_priority);
+ kernel.GlobalScheduler().AdjustSchedulingOnPriority(this, old_priority);
}
ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
+ SchedulerLock lock(kernel);
const auto HighestSetCore = [](u64 mask, u32 max_cores) {
for (s32 core = static_cast<s32>(max_cores - 1); core >= 0; core--) {
if (((mask >> core) & 1) != 0) {
@@ -443,111 +534,12 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) {
processor_id = ideal_core;
}
}
- AdjustSchedulingOnAffinity(old_affinity_mask, old_core);
+ kernel.GlobalScheduler().AdjustSchedulingOnAffinity(this, old_affinity_mask, old_core);
}
}
return RESULT_SUCCESS;
}
-void Thread::AdjustSchedulingOnStatus(u32 old_flags) {
- if (old_flags == scheduling_state) {
- return;
- }
-
- auto& scheduler = kernel.GlobalScheduler();
- if (static_cast<ThreadSchedStatus>(old_flags & static_cast<u32>(ThreadSchedMasks::LowMask)) ==
- ThreadSchedStatus::Runnable) {
- // In this case the thread was running, now it's pausing/exitting
- if (processor_id >= 0) {
- scheduler.Unschedule(current_priority, static_cast<u32>(processor_id), this);
- }
-
- for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
- if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
- scheduler.Unsuggest(current_priority, core, this);
- }
- }
- } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) {
- // The thread is now set to running from being stopped
- if (processor_id >= 0) {
- scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
- }
-
- for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
- if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
- scheduler.Suggest(current_priority, core, this);
- }
- }
- }
-
- scheduler.SetReselectionPending();
-}
-
-void Thread::AdjustSchedulingOnPriority(u32 old_priority) {
- if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) {
- return;
- }
- auto& scheduler = kernel.GlobalScheduler();
- if (processor_id >= 0) {
- scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this);
- }
-
- for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
- if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
- scheduler.Unsuggest(old_priority, core, this);
- }
- }
-
- // Add thread to the new priority queues.
- Thread* current_thread = GetCurrentThread();
-
- if (processor_id >= 0) {
- if (current_thread == this) {
- scheduler.SchedulePrepend(current_priority, static_cast<u32>(processor_id), this);
- } else {
- scheduler.Schedule(current_priority, static_cast<u32>(processor_id), this);
- }
- }
-
- for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
- if (core != static_cast<u32>(processor_id) && ((affinity_mask >> core) & 1) != 0) {
- scheduler.Suggest(current_priority, core, this);
- }
- }
-
- scheduler.SetReselectionPending();
-}
-
-void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) {
- auto& scheduler = kernel.GlobalScheduler();
- if (GetSchedulingStatus() != ThreadSchedStatus::Runnable ||
- current_priority >= THREADPRIO_COUNT) {
- return;
- }
-
- for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
- if (((old_affinity_mask >> core) & 1) != 0) {
- if (core == static_cast<u32>(old_core)) {
- scheduler.Unschedule(current_priority, core, this);
- } else {
- scheduler.Unsuggest(current_priority, core, this);
- }
- }
- }
-
- for (u32 core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
- if (((affinity_mask >> core) & 1) != 0) {
- if (core == static_cast<u32>(processor_id)) {
- scheduler.Schedule(current_priority, core, this);
- } else {
- scheduler.Suggest(current_priority, core, this);
- }
- }
- }
-
- scheduler.SetReselectionPending();
-}
-
////////////////////////////////////////////////////////////////////////////////////////////////////
/**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 23fdef8a4..c0342c462 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -6,26 +6,47 @@
#include <functional>
#include <string>
+#include <utility>
#include <vector>
#include "common/common_types.h"
+#include "common/spin_lock.h"
#include "core/arm/arm_interface.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/synchronization_object.h"
#include "core/hle/result.h"
+namespace Common {
+class Fiber;
+}
+
+namespace Core {
+class ARM_Interface;
+class System;
+} // namespace Core
+
namespace Kernel {
+class GlobalScheduler;
class KernelCore;
class Process;
class Scheduler;
enum ThreadPriority : u32 {
- THREADPRIO_HIGHEST = 0, ///< Highest thread priority
- THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps
- THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps
- THREADPRIO_LOWEST = 63, ///< Lowest thread priority
- THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities.
+ THREADPRIO_HIGHEST = 0, ///< Highest thread priority
+ THREADPRIO_MAX_CORE_MIGRATION = 2, ///< Highest priority for a core migration
+ THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps
+ THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps
+ THREADPRIO_LOWEST = 63, ///< Lowest thread priority
+ THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities.
+};
+
+enum ThreadType : u32 {
+ THREADTYPE_USER = 0x1,
+ THREADTYPE_KERNEL = 0x2,
+ THREADTYPE_HLE = 0x4,
+ THREADTYPE_IDLE = 0x8,
+ THREADTYPE_SUSPEND = 0x10,
};
enum ThreadProcessorId : s32 {
@@ -107,26 +128,45 @@ public:
using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;
- using WakeupCallback =
- std::function<bool(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<SynchronizationObject> object, std::size_t index)>;
+ using HLECallback = std::function<bool(std::shared_ptr<Thread> thread)>;
+
+ /**
+ * Creates and returns a new thread. The new thread is immediately scheduled
+ * @param system The instance of the whole system
+ * @param name The friendly name desired for the thread
+ * @param entry_point The address at which the thread should start execution
+ * @param priority The thread's priority
+ * @param arg User data to pass to the thread
+ * @param processor_id The ID(s) of the processors on which the thread is desired to be run
+ * @param stack_top The address of the thread's stack top
+ * @param owner_process The parent process for the thread, if null, it's a kernel thread
+ * @return A shared pointer to the newly created thread
+ */
+ static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags,
+ std::string name, VAddr entry_point,
+ u32 priority, u64 arg, s32 processor_id,
+ VAddr stack_top, Process* owner_process);
/**
* Creates and returns a new thread. The new thread is immediately scheduled
- * @param kernel The kernel instance this thread will be created under.
+ * @param system The instance of the whole system
* @param name The friendly name desired for the thread
* @param entry_point The address at which the thread should start execution
* @param priority The thread's priority
* @param arg User data to pass to the thread
* @param processor_id The ID(s) of the processors on which the thread is desired to be run
* @param stack_top The address of the thread's stack top
- * @param owner_process The parent process for the thread
+ * @param owner_process The parent process for the thread, if null, it's a kernel thread
+ * @param thread_start_func The function where the host context will start.
+ * @param thread_start_parameter The parameter which will passed to host context on init
* @return A shared pointer to the newly created thread
*/
- static ResultVal<std::shared_ptr<Thread>> Create(KernelCore& kernel, std::string name,
- VAddr entry_point, u32 priority, u64 arg,
- s32 processor_id, VAddr stack_top,
- Process& owner_process);
+ static ResultVal<std::shared_ptr<Thread>> Create(Core::System& system, ThreadType type_flags,
+ std::string name, VAddr entry_point,
+ u32 priority, u64 arg, s32 processor_id,
+ VAddr stack_top, Process* owner_process,
+ std::function<void(void*)>&& thread_start_func,
+ void* thread_start_parameter);
std::string GetName() const override {
return name;
@@ -181,7 +221,7 @@ public:
void UpdatePriority();
/// Changes the core that the thread is running or scheduled to run on.
- void ChangeCore(u32 core, u64 mask);
+ ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
/**
* Gets the thread's thread ID
@@ -194,6 +234,10 @@ public:
/// Resumes a thread from waiting
void ResumeFromWait();
+ void OnWakeUp();
+
+ ResultCode Start();
+
/// Cancels a waiting operation that this thread may or may not be within.
///
/// When the thread is within a waiting state, this will set the thread's
@@ -202,26 +246,19 @@ public:
///
void CancelWait();
- /**
- * Schedules an event to wake up the specified thread after the specified delay
- * @param nanoseconds The time this thread will be allowed to sleep for
- */
- void WakeAfterDelay(s64 nanoseconds);
+ void SetSynchronizationResults(SynchronizationObject* object, ResultCode result);
- /// Cancel any outstanding wakeup events for this thread
- void CancelWakeupTimer();
+ Core::ARM_Interface& ArmInterface();
- /**
- * Sets the result after the thread awakens (from svcWaitSynchronization)
- * @param result Value to set to the returned result
- */
- void SetWaitSynchronizationResult(ResultCode result);
+ const Core::ARM_Interface& ArmInterface() const;
- /**
- * Sets the output parameter value after the thread awakens (from svcWaitSynchronization)
- * @param output Value to set to the output parameter
- */
- void SetWaitSynchronizationOutput(s32 output);
+ SynchronizationObject* GetSignalingObject() const {
+ return signaling_object;
+ }
+
+ ResultCode GetSignalingResult() const {
+ return signaling_result;
+ }
/**
* Retrieves the index that this particular object occupies in the list of objects
@@ -269,11 +306,6 @@ public:
*/
VAddr GetCommandBufferAddress() const;
- /// Returns whether this thread is waiting on objects from a WaitSynchronization call.
- bool IsSleepingOnWait() const {
- return status == ThreadStatus::WaitSynch;
- }
-
ThreadContext32& GetContext32() {
return context_32;
}
@@ -290,6 +322,28 @@ public:
return context_64;
}
+ bool IsHLEThread() const {
+ return (type & THREADTYPE_HLE) != 0;
+ }
+
+ bool IsSuspendThread() const {
+ return (type & THREADTYPE_SUSPEND) != 0;
+ }
+
+ bool IsIdleThread() const {
+ return (type & THREADTYPE_IDLE) != 0;
+ }
+
+ bool WasRunning() const {
+ return was_running;
+ }
+
+ void SetWasRunning(bool value) {
+ was_running = value;
+ }
+
+ std::shared_ptr<Common::Fiber>& GetHostContext();
+
ThreadStatus GetStatus() const {
return status;
}
@@ -325,18 +379,18 @@ public:
}
const ThreadSynchronizationObjects& GetSynchronizationObjects() const {
- return wait_objects;
+ return *wait_objects;
}
- void SetSynchronizationObjects(ThreadSynchronizationObjects objects) {
- wait_objects = std::move(objects);
+ void SetSynchronizationObjects(ThreadSynchronizationObjects* objects) {
+ wait_objects = objects;
}
void ClearSynchronizationObjects() {
- for (const auto& waiting_object : wait_objects) {
+ for (const auto& waiting_object : *wait_objects) {
waiting_object->RemoveWaitingThread(SharedFrom(this));
}
- wait_objects.clear();
+ wait_objects->clear();
}
/// Determines whether all the objects this thread is waiting on are ready.
@@ -386,26 +440,35 @@ public:
arb_wait_address = address;
}
- bool HasWakeupCallback() const {
- return wakeup_callback != nullptr;
+ bool HasHLECallback() const {
+ return hle_callback != nullptr;
}
- void SetWakeupCallback(WakeupCallback callback) {
- wakeup_callback = std::move(callback);
+ void SetHLECallback(HLECallback callback) {
+ hle_callback = std::move(callback);
}
- void InvalidateWakeupCallback() {
- SetWakeupCallback(nullptr);
+ void SetHLETimeEvent(Handle time_event) {
+ hle_time_event = time_event;
}
- /**
- * Invokes the thread's wakeup callback.
- *
- * @pre A valid wakeup callback has been set. Violating this precondition
- * will cause an assertion to trigger.
- */
- bool InvokeWakeupCallback(ThreadWakeupReason reason, std::shared_ptr<Thread> thread,
- std::shared_ptr<SynchronizationObject> object, std::size_t index);
+ void SetHLESyncObject(SynchronizationObject* object) {
+ hle_object = object;
+ }
+
+ Handle GetHLETimeEvent() const {
+ return hle_time_event;
+ }
+
+ SynchronizationObject* GetHLESyncObject() const {
+ return hle_object;
+ }
+
+ void InvalidateHLECallback() {
+ SetHLECallback(nullptr);
+ }
+
+ bool InvokeHLECallback(std::shared_ptr<Thread> thread);
u32 GetIdealCore() const {
return ideal_core;
@@ -415,23 +478,19 @@ public:
return affinity_mask;
}
- ThreadActivity GetActivity() const {
- return activity;
- }
-
- void SetActivity(ThreadActivity value);
+ ResultCode SetActivity(ThreadActivity value);
/// Sleeps this thread for the given amount of nanoseconds.
- void Sleep(s64 nanoseconds);
+ ResultCode Sleep(s64 nanoseconds);
/// Yields this thread without rebalancing loads.
- bool YieldSimple();
+ std::pair<ResultCode, bool> YieldSimple();
/// Yields this thread and does a load rebalancing.
- bool YieldAndBalanceLoad();
+ std::pair<ResultCode, bool> YieldAndBalanceLoad();
/// Yields this thread and if the core is left idle, loads are rebalanced
- bool YieldAndWaitForLoadBalancing();
+ std::pair<ResultCode, bool> YieldAndWaitForLoadBalancing();
void IncrementYieldCount() {
yield_count++;
@@ -446,6 +505,10 @@ public:
static_cast<u32>(ThreadSchedMasks::LowMask));
}
+ bool IsRunnable() const {
+ return scheduling_state == static_cast<u32>(ThreadSchedStatus::Runnable);
+ }
+
bool IsRunning() const {
return is_running;
}
@@ -466,17 +529,67 @@ public:
return global_handle;
}
+ bool IsWaitingForArbitration() const {
+ return waiting_for_arbitration;
+ }
+
+ void WaitForArbitration(bool set) {
+ waiting_for_arbitration = set;
+ }
+
+ bool IsWaitingSync() const {
+ return is_waiting_on_sync;
+ }
+
+ void SetWaitingSync(bool is_waiting) {
+ is_waiting_on_sync = is_waiting;
+ }
+
+ bool IsPendingTermination() const {
+ return will_be_terminated || GetSchedulingStatus() == ThreadSchedStatus::Exited;
+ }
+
+ bool IsPaused() const {
+ return pausing_state != 0;
+ }
+
+ bool IsContinuousOnSVC() const {
+ return is_continuous_on_svc;
+ }
+
+ void SetContinuousOnSVC(bool is_continuous) {
+ is_continuous_on_svc = is_continuous;
+ }
+
+ bool IsPhantomMode() const {
+ return is_phantom_mode;
+ }
+
+ void SetPhantomMode(bool phantom) {
+ is_phantom_mode = phantom;
+ }
+
+ bool HasExited() const {
+ return has_exited;
+ }
+
private:
+ friend class GlobalScheduler;
+ friend class Scheduler;
+
void SetSchedulingStatus(ThreadSchedStatus new_status);
+ void AddSchedulingFlag(ThreadSchedFlags flag);
+ void RemoveSchedulingFlag(ThreadSchedFlags flag);
+
void SetCurrentPriority(u32 new_priority);
- ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask);
- void AdjustSchedulingOnStatus(u32 old_flags);
- void AdjustSchedulingOnPriority(u32 old_priority);
void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
+ Common::SpinLock context_guard{};
ThreadContext32 context_32{};
ThreadContext64 context_64{};
+ std::unique_ptr<Core::ARM_Interface> arm_interface{};
+ std::shared_ptr<Common::Fiber> host_context{};
u64 thread_id = 0;
@@ -485,6 +598,8 @@ private:
VAddr entry_point = 0;
VAddr stack_top = 0;
+ ThreadType type;
+
/// Nominal thread priority, as set by the emulated application.
/// The nominal priority is the thread priority without priority
/// inheritance taken into account.
@@ -509,7 +624,10 @@ private:
/// Objects that the thread is waiting on, in the same order as they were
/// passed to WaitSynchronization.
- ThreadSynchronizationObjects wait_objects;
+ ThreadSynchronizationObjects* wait_objects;
+
+ SynchronizationObject* signaling_object;
+ ResultCode signaling_result{RESULT_SUCCESS};
/// List of threads that are waiting for a mutex that is held by this thread.
MutexWaitingThreads wait_mutex_threads;
@@ -526,30 +644,39 @@ private:
/// If waiting for an AddressArbiter, this is the address being waited on.
VAddr arb_wait_address{0};
+ bool waiting_for_arbitration{};
/// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
Handle global_handle = 0;
- /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread
- /// was waiting via WaitSynchronization then the object will be the last object that became
- /// available. In case of a timeout, the object will be nullptr.
- WakeupCallback wakeup_callback;
+ /// Callback for HLE Events
+ HLECallback hle_callback;
+ Handle hle_time_event;
+ SynchronizationObject* hle_object;
Scheduler* scheduler = nullptr;
u32 ideal_core{0xFFFFFFFF};
u64 affinity_mask{0x1};
- ThreadActivity activity = ThreadActivity::Normal;
-
s32 ideal_core_override = -1;
u64 affinity_mask_override = 0x1;
u32 affinity_override_count = 0;
u32 scheduling_state = 0;
+ u32 pausing_state = 0;
bool is_running = false;
+ bool is_waiting_on_sync = false;
bool is_sync_cancelled = false;
+ bool is_continuous_on_svc = false;
+
+ bool will_be_terminated = false;
+ bool is_phantom_mode = false;
+ bool has_exited = false;
+
+ bool was_running = false;
+
std::string name;
};
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
index 21b290468..941305e8e 100644
--- a/src/core/hle/kernel/time_manager.cpp
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -8,30 +8,37 @@
#include "core/core_timing_util.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/time_manager.h"
namespace Kernel {
-TimeManager::TimeManager(Core::System& system) : system{system} {
+TimeManager::TimeManager(Core::System& system_) : system{system_} {
time_manager_event_type = Core::Timing::CreateEvent(
"Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
+ SchedulerLock lock(system.Kernel());
Handle proper_handle = static_cast<Handle>(thread_handle);
+ if (cancelled_events[proper_handle]) {
+ return;
+ }
std::shared_ptr<Thread> thread =
this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
- thread->ResumeFromWait();
+ thread->OnWakeUp();
});
}
void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) {
+ event_handle = timetask->GetGlobalHandle();
if (nanoseconds > 0) {
ASSERT(timetask);
- event_handle = timetask->GetGlobalHandle();
- const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
- system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle);
+ ASSERT(timetask->GetStatus() != ThreadStatus::Ready);
+ ASSERT(timetask->GetStatus() != ThreadStatus::WaitMutex);
+ system.CoreTiming().ScheduleEvent(nanoseconds, time_manager_event_type, event_handle);
} else {
event_handle = InvalidHandle;
}
+ cancelled_events[event_handle] = false;
}
void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
@@ -39,6 +46,12 @@ void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
return;
}
system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle);
+ cancelled_events[event_handle] = true;
+}
+
+void TimeManager::CancelTimeEvent(Thread* time_task) {
+ Handle event_handle = time_task->GetGlobalHandle();
+ UnscheduleTimeEvent(event_handle);
}
} // namespace Kernel
diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h
index eaec486d1..307a18765 100644
--- a/src/core/hle/kernel/time_manager.h
+++ b/src/core/hle/kernel/time_manager.h
@@ -5,6 +5,7 @@
#pragma once
#include <memory>
+#include <unordered_map>
#include "core/hle/kernel/object.h"
@@ -35,9 +36,12 @@ public:
/// Unschedule an existing time event
void UnscheduleTimeEvent(Handle event_handle);
+ void CancelTimeEvent(Thread* time_task);
+
private:
Core::System& system;
std::shared_ptr<Core::Timing::EventType> time_manager_event_type;
+ std::unordered_map<Handle, bool> cancelled_events;
};
} // namespace Kernel
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 630a8b048..94d8c1fc6 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -44,6 +44,218 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) {
return static_cast<u32>(std::min(size, max_jpeg_image_size));
}
+class IManagerForSystemService final : public ServiceFramework<IManagerForSystemService> {
+public:
+ explicit IManagerForSystemService(Common::UUID user_id)
+ : ServiceFramework("IManagerForSystemService") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "CheckAvailability"},
+ {1, nullptr, "GetAccountId"},
+ {2, nullptr, "EnsureIdTokenCacheAsync"},
+ {3, nullptr, "LoadIdTokenCache"},
+ {100, nullptr, "SetSystemProgramIdentification"},
+ {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
+ {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
+ {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
+ {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
+ {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
+ {120, nullptr, "GetNintendoAccountId"},
+ {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
+ {130, nullptr, "GetNintendoAccountUserResourceCache"},
+ {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
+ {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
+ {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
+ {134, nullptr, "RefreshNintendoAccountVerificationUrlCache"}, // 9.0.0+
+ {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
+ {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
+ {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
+ {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
+ {150, nullptr, "CreateAuthorizationRequest"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+// 3.0.0+
+class IFloatingRegistrationRequest final : public ServiceFramework<IFloatingRegistrationRequest> {
+public:
+ explicit IFloatingRegistrationRequest(Common::UUID user_id)
+ : ServiceFramework("IFloatingRegistrationRequest") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "GetSessionId"},
+ {12, nullptr, "GetAccountId"},
+ {13, nullptr, "GetLinkedNintendoAccountId"},
+ {14, nullptr, "GetNickname"},
+ {15, nullptr, "GetProfileImage"},
+ {21, nullptr, "LoadIdTokenCache"},
+ {100, nullptr, "RegisterUser"}, // [1.0.0-3.0.2] RegisterAsync
+ {101, nullptr, "RegisterUserWithUid"}, // [1.0.0-3.0.2] RegisterWithUidAsync
+ {102, nullptr, "RegisterNetworkServiceAccountAsync"}, // 4.0.0+
+ {103, nullptr, "RegisterNetworkServiceAccountWithUidAsync"}, // 4.0.0+
+ {110, nullptr, "SetSystemProgramIdentification"},
+ {111, nullptr, "EnsureIdTokenCacheAsync"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+class IAdministrator final : public ServiceFramework<IAdministrator> {
+public:
+ explicit IAdministrator(Common::UUID user_id) : ServiceFramework("IAdministrator") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "CheckAvailability"},
+ {1, nullptr, "GetAccountId"},
+ {2, nullptr, "EnsureIdTokenCacheAsync"},
+ {3, nullptr, "LoadIdTokenCache"},
+ {100, nullptr, "SetSystemProgramIdentification"},
+ {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
+ {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
+ {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
+ {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
+ {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
+ {120, nullptr, "GetNintendoAccountId"},
+ {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
+ {130, nullptr, "GetNintendoAccountUserResourceCache"},
+ {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
+ {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
+ {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
+ {134, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsync"}, // 9.0.0+
+ {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
+ {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
+ {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
+ {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
+ {150, nullptr, "CreateAuthorizationRequest"},
+ {200, nullptr, "IsRegistered"},
+ {201, nullptr, "RegisterAsync"},
+ {202, nullptr, "UnregisterAsync"},
+ {203, nullptr, "DeleteRegistrationInfoLocally"},
+ {220, nullptr, "SynchronizeProfileAsync"},
+ {221, nullptr, "UploadProfileAsync"},
+ {222, nullptr, "SynchronizaProfileAsyncIfSecondsElapsed"},
+ {250, nullptr, "IsLinkedWithNintendoAccount"},
+ {251, nullptr, "CreateProcedureToLinkWithNintendoAccount"},
+ {252, nullptr, "ResumeProcedureToLinkWithNintendoAccount"},
+ {255, nullptr, "CreateProcedureToUpdateLinkageStateOfNintendoAccount"},
+ {256, nullptr, "ResumeProcedureToUpdateLinkageStateOfNintendoAccount"},
+ {260, nullptr, "CreateProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
+ {261, nullptr, "ResumeProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
+ {280, nullptr, "ProxyProcedureToAcquireApplicationAuthorizationForNintendoAccount"},
+ {290, nullptr, "GetRequestForNintendoAccountUserResourceView"}, // 8.0.0+
+ {300, nullptr, "TryRecoverNintendoAccountUserStateAsync"}, // 6.0.0+
+ {400, nullptr, "IsServiceEntryRequirementCacheRefreshRequiredForOnlinePlay"}, // 6.1.0+
+ {401, nullptr, "RefreshServiceEntryRequirementCacheForOnlinePlayAsync"}, // 6.1.0+
+ {900, nullptr, "GetAuthenticationInfoForWin"}, // 9.0.0+
+ {901, nullptr, "ImportAsyncForWin"}, // 9.0.0+
+ {997, nullptr, "DebugUnlinkNintendoAccountAsync"},
+ {998, nullptr, "DebugSetAvailabilityErrorDetail"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+class IAuthorizationRequest final : public ServiceFramework<IAuthorizationRequest> {
+public:
+ explicit IAuthorizationRequest(Common::UUID user_id)
+ : ServiceFramework("IAuthorizationRequest") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "GetSessionId"},
+ {10, nullptr, "InvokeWithoutInteractionAsync"},
+ {19, nullptr, "IsAuthorized"},
+ {20, nullptr, "GetAuthorizationCode"},
+ {21, nullptr, "GetIdToken"},
+ {22, nullptr, "GetState"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+class IOAuthProcedure final : public ServiceFramework<IOAuthProcedure> {
+public:
+ explicit IOAuthProcedure(Common::UUID user_id) : ServiceFramework("IOAuthProcedure") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "PrepareAsync"},
+ {1, nullptr, "GetRequest"},
+ {2, nullptr, "ApplyResponse"},
+ {3, nullptr, "ApplyResponseAsync"},
+ {10, nullptr, "Suspend"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+// 3.0.0+
+class IOAuthProcedureForExternalNsa final : public ServiceFramework<IOAuthProcedureForExternalNsa> {
+public:
+ explicit IOAuthProcedureForExternalNsa(Common::UUID user_id)
+ : ServiceFramework("IOAuthProcedureForExternalNsa") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "PrepareAsync"},
+ {1, nullptr, "GetRequest"},
+ {2, nullptr, "ApplyResponse"},
+ {3, nullptr, "ApplyResponseAsync"},
+ {10, nullptr, "Suspend"},
+ {100, nullptr, "GetAccountId"},
+ {101, nullptr, "GetLinkedNintendoAccountId"},
+ {102, nullptr, "GetNickname"},
+ {103, nullptr, "GetProfileImage"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+class IOAuthProcedureForNintendoAccountLinkage final
+ : public ServiceFramework<IOAuthProcedureForNintendoAccountLinkage> {
+public:
+ explicit IOAuthProcedureForNintendoAccountLinkage(Common::UUID user_id)
+ : ServiceFramework("IOAuthProcedureForNintendoAccountLinkage") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "PrepareAsync"},
+ {1, nullptr, "GetRequest"},
+ {2, nullptr, "ApplyResponse"},
+ {3, nullptr, "ApplyResponseAsync"},
+ {10, nullptr, "Suspend"},
+ {100, nullptr, "GetRequestWithTheme"},
+ {101, nullptr, "IsNetworkServiceAccountReplaced"},
+ {199, nullptr, "GetUrlForIntroductionOfExtraMembership"}, // 2.0.0 - 5.1.0
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+class INotifier final : public ServiceFramework<INotifier> {
+public:
+ explicit INotifier(Common::UUID user_id) : ServiceFramework("INotifier") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "GetSystemEvent"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
class IProfileCommon : public ServiceFramework<IProfileCommon> {
public:
explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id,
@@ -226,6 +438,54 @@ public:
: IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}
};
+class IAsyncContext final : public ServiceFramework<IAsyncContext> {
+public:
+ explicit IAsyncContext(Common::UUID user_id) : ServiceFramework("IAsyncContext") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "GetSystemEvent"},
+ {1, nullptr, "Cancel"},
+ {2, nullptr, "HasDone"},
+ {3, nullptr, "GetResult"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+class ISessionObject final : public ServiceFramework<ISessionObject> {
+public:
+ explicit ISessionObject(Common::UUID user_id) : ServiceFramework("ISessionObject") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {999, nullptr, "Dummy"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+class IGuestLoginRequest final : public ServiceFramework<IGuestLoginRequest> {
+public:
+ explicit IGuestLoginRequest(Common::UUID) : ServiceFramework("IGuestLoginRequest") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "GetSessionId"},
+ {11, nullptr, "Unknown"}, // 1.0.0 - 2.3.0 (the name is blank on Switchbrew)
+ {12, nullptr, "GetAccountId"},
+ {13, nullptr, "GetLinkedNintendoAccountId"},
+ {14, nullptr, "GetNickname"},
+ {15, nullptr, "GetProfileImage"},
+ {21, nullptr, "LoadIdTokenCache"}, // 3.0.0+
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
public:
explicit IManagerForApplication(Common::UUID user_id)
@@ -265,6 +525,87 @@ private:
Common::UUID user_id;
};
+// 6.0.0+
+class IAsyncNetworkServiceLicenseKindContext final
+ : public ServiceFramework<IAsyncNetworkServiceLicenseKindContext> {
+public:
+ explicit IAsyncNetworkServiceLicenseKindContext(Common::UUID user_id)
+ : ServiceFramework("IAsyncNetworkServiceLicenseKindContext") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "GetSystemEvent"},
+ {1, nullptr, "Cancel"},
+ {2, nullptr, "HasDone"},
+ {3, nullptr, "GetResult"},
+ {4, nullptr, "GetNetworkServiceLicenseKind"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+// 8.0.0+
+class IOAuthProcedureForUserRegistration final
+ : public ServiceFramework<IOAuthProcedureForUserRegistration> {
+public:
+ explicit IOAuthProcedureForUserRegistration(Common::UUID user_id)
+ : ServiceFramework("IOAuthProcedureForUserRegistration") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "PrepareAsync"},
+ {1, nullptr, "GetRequest"},
+ {2, nullptr, "ApplyResponse"},
+ {3, nullptr, "ApplyResponseAsync"},
+ {10, nullptr, "Suspend"},
+ {100, nullptr, "GetAccountId"},
+ {101, nullptr, "GetLinkedNintendoAccountId"},
+ {102, nullptr, "GetNickname"},
+ {103, nullptr, "GetProfileImage"},
+ {110, nullptr, "RegisterUserAsync"},
+ {111, nullptr, "GetUid"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+class DAUTH_O final : public ServiceFramework<DAUTH_O> {
+public:
+ explicit DAUTH_O(Common::UUID) : ServiceFramework("dauth:o") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "EnsureAuthenticationTokenCacheAsync"}, // [5.0.0-5.1.0] GeneratePostData
+ {1, nullptr, "LoadAuthenticationTokenCache"}, // 6.0.0+
+ {2, nullptr, "InvalidateAuthenticationTokenCache"}, // 6.0.0+
+ {10, nullptr, "EnsureEdgeTokenCacheAsync"}, // 6.0.0+
+ {11, nullptr, "LoadEdgeTokenCache"}, // 6.0.0+
+ {12, nullptr, "InvalidateEdgeTokenCache"}, // 6.0.0+
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
+// 6.0.0+
+class IAsyncResult final : public ServiceFramework<IAsyncResult> {
+public:
+ explicit IAsyncResult(Common::UUID user_id) : ServiceFramework("IAsyncResult") {
+ // clang-format off
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "GetResult"},
+ {1, nullptr, "Cancel"},
+ {2, nullptr, "IsAvailable"},
+ {3, nullptr, "GetSystemEvent"},
+ };
+ // clang-format on
+
+ RegisterHandlers(functions);
+ }
+};
+
void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called");
IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/acc/acc_aa.cpp b/src/core/hle/service/acc/acc_aa.cpp
index 3bac6bcd1..51f119b12 100644
--- a/src/core/hle/service/acc/acc_aa.cpp
+++ b/src/core/hle/service/acc/acc_aa.cpp
@@ -13,8 +13,8 @@ ACC_AA::ACC_AA(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
{0, nullptr, "EnsureCacheAsync"},
{1, nullptr, "LoadCache"},
{2, nullptr, "GetDeviceAccountId"},
- {50, nullptr, "RegisterNotificationTokenAsync"},
- {51, nullptr, "UnregisterNotificationTokenAsync"},
+ {50, nullptr, "RegisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
+ {51, nullptr, "UnregisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
};
RegisterHandlers(functions);
}
diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp
index 2eefc6df5..85620bde3 100644
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -17,28 +17,28 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
{3, &ACC_SU::ListOpenUsers, "ListOpenUsers"},
{4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"},
{5, &ACC_SU::GetProfile, "GetProfile"},
- {6, nullptr, "GetProfileDigest"},
+ {6, nullptr, "GetProfileDigest"}, // 3.0.0+
{50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
{51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
- {60, nullptr, "ListOpenContextStoredUsers"},
- {99, nullptr, "DebugActivateOpenContextRetention"},
+ {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
+ {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
{100, nullptr, "GetUserRegistrationNotifier"},
{101, nullptr, "GetUserStateChangeNotifier"},
{102, nullptr, "GetBaasAccountManagerForSystemService"},
{103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
{104, nullptr, "GetProfileUpdateNotifier"},
- {105, nullptr, "CheckNetworkServiceAvailabilityAsync"},
- {106, nullptr, "GetProfileSyncNotifier"},
+ {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
+ {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
{110, nullptr, "StoreSaveDataThumbnail"},
{111, nullptr, "ClearSaveDataThumbnail"},
{112, nullptr, "LoadSaveDataThumbnail"},
- {113, nullptr, "GetSaveDataThumbnailExistence"},
- {120, nullptr, "ListOpenUsersInApplication"},
- {130, nullptr, "ActivateOpenContextRetention"},
- {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"},
- {150, nullptr, "AuthenticateApplicationAsync"},
- {190, nullptr, "GetUserLastOpenedApplication"},
- {191, nullptr, "ActivateOpenContextHolder"},
+ {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
+ {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
+ {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
+ {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
+ {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
+ {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
+ {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
{200, nullptr, "BeginUserRegistration"},
{201, nullptr, "CompleteUserRegistration"},
{202, nullptr, "CancelUserRegistration"},
@@ -46,15 +46,15 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
{204, nullptr, "SetUserPosition"},
{205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},
{206, nullptr, "CompleteUserRegistrationForcibly"},
- {210, nullptr, "CreateFloatingRegistrationRequest"},
- {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"},
- {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"},
+ {210, nullptr, "CreateFloatingRegistrationRequest"}, // 3.0.0+
+ {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
+ {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
{230, nullptr, "AuthenticateServiceAsync"},
{250, nullptr, "GetBaasAccountAdministrator"},
{290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"},
- {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"},
+ {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, // 3.0.0+
{299, nullptr, "SuspendBackgroundDaemon"},
- {997, nullptr, "DebugInvalidateTokenCacheForUser"},
+ {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
{998, nullptr, "DebugSetUserStateClose"},
{999, nullptr, "DebugSetUserStateOpen"},
};
diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp
index fb4e7e772..49f6e20f1 100644
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -17,23 +17,23 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
{3, &ACC_U0::ListOpenUsers, "ListOpenUsers"},
{4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"},
{5, &ACC_U0::GetProfile, "GetProfile"},
- {6, nullptr, "GetProfileDigest"},
+ {6, nullptr, "GetProfileDigest"}, // 3.0.0+
{50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
{51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
- {60, nullptr, "ListOpenContextStoredUsers"},
- {99, nullptr, "DebugActivateOpenContextRetention"},
+ {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
+ {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
{100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
{101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
{102, nullptr, "AuthenticateApplicationAsync"},
- {103, nullptr, "CheckNetworkServiceAvailabilityAsync"},
+ {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
{110, nullptr, "StoreSaveDataThumbnail"},
{111, nullptr, "ClearSaveDataThumbnail"},
{120, nullptr, "CreateGuestLoginRequest"},
- {130, nullptr, "LoadOpenContext"},
- {131, nullptr, "ListOpenContextStoredUsers"},
- {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"},
- {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"},
- {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"},
+ {130, nullptr, "LoadOpenContext"}, // 5.0.0+
+ {131, nullptr, "ListOpenContextStoredUsers"}, // 6.0.0+
+ {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, // 6.0.0+
+ {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
+ {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, // 6.0.0+
};
// clang-format on
diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp
index 9f29cdc82..f47004f84 100644
--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -17,28 +17,29 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
{3, &ACC_U1::ListOpenUsers, "ListOpenUsers"},
{4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"},
{5, &ACC_U1::GetProfile, "GetProfile"},
- {6, nullptr, "GetProfileDigest"},
+ {6, nullptr, "GetProfileDigest"}, // 3.0.0+
{50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
{51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
- {60, nullptr, "ListOpenContextStoredUsers"},
- {99, nullptr, "DebugActivateOpenContextRetention"},
+ {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
+ {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
{100, nullptr, "GetUserRegistrationNotifier"},
{101, nullptr, "GetUserStateChangeNotifier"},
{102, nullptr, "GetBaasAccountManagerForSystemService"},
- {103, nullptr, "GetProfileUpdateNotifier"},
- {104, nullptr, "CheckNetworkServiceAvailabilityAsync"},
- {105, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
- {106, nullptr, "GetProfileSyncNotifier"},
+ {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
+ {104, nullptr, "GetProfileUpdateNotifier"},
+ {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
+ {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
{110, nullptr, "StoreSaveDataThumbnail"},
{111, nullptr, "ClearSaveDataThumbnail"},
{112, nullptr, "LoadSaveDataThumbnail"},
- {113, nullptr, "GetSaveDataThumbnailExistence"},
- {130, nullptr, "ActivateOpenContextRetention"},
- {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"},
- {150, nullptr, "AuthenticateApplicationAsync"},
- {190, nullptr, "GetUserLastOpenedApplication"},
- {191, nullptr, "ActivateOpenContextHolder"},
- {997, nullptr, "DebugInvalidateTokenCacheForUser"},
+ {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
+ {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
+ {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
+ {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
+ {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
+ {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
+ {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
+ {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
{998, nullptr, "DebugSetUserStateClose"},
{999, nullptr, "DebugSetUserStateOpen"},
};
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 4df74c4f9..20f366635 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -68,6 +68,7 @@ IWindowController::IWindowController(Core::System& system_)
static const FunctionInfo functions[] = {
{0, nullptr, "CreateWindow"},
{1, &IWindowController::GetAppletResourceUserId, "GetAppletResourceUserId"},
+ {2, nullptr, "GetAppletResourceUserIdOfCallerApplet"},
{10, &IWindowController::AcquireForegroundRights, "AcquireForegroundRights"},
{11, nullptr, "ReleaseForegroundRights"},
{12, nullptr, "RejectToChangeIntoBackground"},
@@ -189,8 +190,8 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
{5, nullptr, "GetLastForegroundCaptureImageEx"},
{6, nullptr, "GetLastApplicationCaptureImageEx"},
{7, nullptr, "GetCallerAppletCaptureImageEx"},
- {8, nullptr, "TakeScreenShotOfOwnLayer"}, // 2.0.0+
- {9, nullptr, "CopyBetweenCaptureBuffers"}, // 5.0.0+
+ {8, nullptr, "TakeScreenShotOfOwnLayer"},
+ {9, nullptr, "CopyBetweenCaptureBuffers"},
{10, nullptr, "AcquireLastApplicationCaptureBuffer"},
{11, nullptr, "ReleaseLastApplicationCaptureBuffer"},
{12, nullptr, "AcquireLastForegroundCaptureBuffer"},
@@ -200,17 +201,14 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
{16, nullptr, "AcquireLastApplicationCaptureBufferEx"},
{17, nullptr, "AcquireLastForegroundCaptureBufferEx"},
{18, nullptr, "AcquireCallerAppletCaptureBufferEx"},
- // 2.0.0+
{20, nullptr, "ClearCaptureBuffer"},
{21, nullptr, "ClearAppletTransitionBuffer"},
- // 4.0.0+
{22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"},
{23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"},
{24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"},
{25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"},
{26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"},
{27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"},
- // 6.0.0+
{28, nullptr, "TakeScreenShotOfOwnLayerEx"},
};
// clang-format on
@@ -225,7 +223,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
static const FunctionInfo functions[] = {
{0, nullptr, "NotifyMessageToHomeMenuForDebug"},
{1, nullptr, "OpenMainApplication"},
- {10, nullptr, "EmulateButtonEvent"},
+ {10, nullptr, "PerformSystemButtonPressing"},
{20, nullptr, "InvalidateTransitionLayer"},
{30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
{40, nullptr, "GetAppletResourceUsageInfo"},
@@ -267,7 +265,7 @@ ISelfController::ISelfController(Core::System& system,
{16, &ISelfController::SetOutOfFocusSuspendingEnabled, "SetOutOfFocusSuspendingEnabled"},
{17, nullptr, "SetControllerFirmwareUpdateSection"},
{18, nullptr, "SetRequiresCaptureButtonShortPressedMessage"},
- {19, &ISelfController::SetScreenShotImageOrientation, "SetScreenShotImageOrientation"},
+ {19, &ISelfController::SetAlbumImageOrientation, "SetAlbumImageOrientation"},
{20, nullptr, "SetDesirableKeyboardLayout"},
{40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"},
{41, nullptr, "IsSystemBufferSharingEnabled"},
@@ -443,7 +441,7 @@ void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext&
rb.Push(RESULT_SUCCESS);
}
-void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) {
+void ISelfController::SetAlbumImageOrientation(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_AM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
@@ -607,6 +605,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system,
{20, nullptr, "PushToGeneralChannel"},
{30, nullptr, "GetHomeButtonReaderLockAccessor"},
{31, nullptr, "GetReaderLockAccessorEx"},
+ {32, nullptr, "GetWriterLockAccessorEx"},
{40, nullptr, "GetCradleFwVersion"},
{50, &ICommonStateGetter::IsVrModeEnabled, "IsVrModeEnabled"},
{51, &ICommonStateGetter::SetVrModeEnabled, "SetVrModeEnabled"},
@@ -1132,6 +1131,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
{24, nullptr, "GetLaunchStorageInfoForDebug"},
{25, &IApplicationFunctions::ExtendSaveData, "ExtendSaveData"},
{26, &IApplicationFunctions::GetSaveDataSize, "GetSaveDataSize"},
+ {27, nullptr, "CreateCacheStorage"},
{30, &IApplicationFunctions::BeginBlockingHomeButtonShortAndLongPressed, "BeginBlockingHomeButtonShortAndLongPressed"},
{31, &IApplicationFunctions::EndBlockingHomeButtonShortAndLongPressed, "EndBlockingHomeButtonShortAndLongPressed"},
{32, &IApplicationFunctions::BeginBlockingHomeButton, "BeginBlockingHomeButton"},
@@ -1157,6 +1157,8 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
{120, nullptr, "ExecuteProgram"},
{121, nullptr, "ClearUserChannel"},
{122, nullptr, "UnpopToUserChannel"},
+ {123, nullptr, "GetPreviousProgramIndex"},
+ {124, nullptr, "EnableApplicationAllThreadDumpOnCrash"},
{130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"},
{140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"},
{141, nullptr, "TryPopFromFriendInvitationStorageChannel"},
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 469f7f814..2f69466ec 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -138,7 +138,7 @@ private:
void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
- void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx);
+ void SetAlbumImageOrientation(Kernel::HLERequestContext& ctx);
void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx);
void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index 54e63c138..fbe3686ae 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -30,7 +30,7 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters(
config.sub_text.size());
params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(),
config.guide_text.size());
- params.initial_text = initial_text;
+ params.initial_text = std::move(initial_text);
params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit;
params.password = static_cast<bool>(config.is_password);
params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position);
@@ -60,7 +60,7 @@ void SoftwareKeyboard::Initialize() {
std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig));
const auto work_buffer_storage = broker.PopNormalDataToApplet();
- ASSERT(work_buffer_storage != nullptr);
+ ASSERT_OR_EXECUTE(work_buffer_storage != nullptr, { return; });
const auto& work_buffer = work_buffer_storage->GetData();
if (config.initial_string_size == 0)
@@ -109,7 +109,7 @@ void SoftwareKeyboard::Execute() {
const auto parameters = ConvertToFrontendParameters(config, initial_text);
- frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(text); },
+ frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(std::move(text)); },
parameters);
}
diff --git a/src/core/hle/service/am/spsm.cpp b/src/core/hle/service/am/spsm.cpp
index 003ee8667..f27729ce7 100644
--- a/src/core/hle/service/am/spsm.cpp
+++ b/src/core/hle/service/am/spsm.cpp
@@ -10,17 +10,17 @@ SPSM::SPSM() : ServiceFramework{"spsm"} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "GetState"},
- {1, nullptr, "SleepSystemAndWaitAwake"},
- {2, nullptr, "Unknown1"},
- {3, nullptr, "Unknown2"},
+ {1, nullptr, "EnterSleep"},
+ {2, nullptr, "GetLastWakeReason"},
+ {3, nullptr, "Shutdown"},
{4, nullptr, "GetNotificationMessageEventHandle"},
- {5, nullptr, "Unknown3"},
- {6, nullptr, "Unknown4"},
- {7, nullptr, "Unknown5"},
+ {5, nullptr, "ReceiveNotificationMessage"},
+ {6, nullptr, "AnalyzeLogForLastSleepWakeSequence"},
+ {7, nullptr, "ResetEventLog"},
{8, nullptr, "AnalyzePerformanceLogForLastSleepWakeSequence"},
{9, nullptr, "ChangeHomeButtonLongPressingTime"},
- {10, nullptr, "Unknown6"},
- {11, nullptr, "Unknown7"},
+ {10, nullptr, "PutErrorState"},
+ {11, nullptr, "InvalidateCurrentHomeButtonPressing"},
};
// clang-format on
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index 4227a4adf..8e79f707b 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -60,6 +60,7 @@ AOC_U::AOC_U(Core::System& system)
{6, nullptr, "PrepareAddOnContentByApplicationId"},
{7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
{8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
+ {9, nullptr, "GetAddOnContentLostErrorCode"},
{100, nullptr, "CreateEcPurchasedEventManager"},
{101, nullptr, "CreatePermanentEcPurchasedEventManager"},
};
diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp
index 5febe8fc1..d29e78d7e 100644
--- a/src/core/hle/service/bcat/backend/boxcat.cpp
+++ b/src/core/hle/service/bcat/backend/boxcat.cpp
@@ -4,8 +4,8 @@
#include <fmt/ostream.h>
#include <httplib.h>
-#include <json.hpp>
#include <mbedtls/sha256.h>
+#include <nlohmann/json.hpp>
#include "common/hex_util.h"
#include "common/logging/backend.h"
#include "common/logging/log.h"
diff --git a/src/core/hle/service/bcat/bcat.cpp b/src/core/hle/service/bcat/bcat.cpp
index 8bb2528c9..b31766212 100644
--- a/src/core/hle/service/bcat/bcat.cpp
+++ b/src/core/hle/service/bcat/bcat.cpp
@@ -14,6 +14,8 @@ BCAT::BCAT(Core::System& system, std::shared_ptr<Module> module,
{0, &BCAT::CreateBcatService, "CreateBcatService"},
{1, &BCAT::CreateDeliveryCacheStorageService, "CreateDeliveryCacheStorageService"},
{2, &BCAT::CreateDeliveryCacheStorageServiceWithApplicationId, "CreateDeliveryCacheStorageServiceWithApplicationId"},
+ {3, nullptr, "CreateDeliveryCacheProgressService"},
+ {4, nullptr, "CreateDeliveryCacheProgressServiceWithApplicationId"},
};
// clang-format on
RegisterHandlers(functions);
diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp
index 34aba7a27..603b64d4f 100644
--- a/src/core/hle/service/bcat/module.cpp
+++ b/src/core/hle/service/bcat/module.cpp
@@ -143,10 +143,13 @@ public:
{20401, nullptr, "UnregisterSystemApplicationDeliveryTask"},
{20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"},
{30100, &IBcatService::SetPassphrase, "SetPassphrase"},
+ {30101, nullptr, "Unknown"},
+ {30102, nullptr, "Unknown2"},
{30200, nullptr, "RegisterBackgroundDeliveryTask"},
{30201, nullptr, "UnregisterBackgroundDeliveryTask"},
{30202, nullptr, "BlockDeliveryTask"},
{30203, nullptr, "UnblockDeliveryTask"},
+ {30210, nullptr, "SetDeliveryTaskTimer"},
{30300, nullptr, "RegisterSystemApplicationDeliveryTasks"},
{90100, nullptr, "EnumerateBackgroundDeliveryTask"},
{90200, nullptr, "GetDeliveryList"},
diff --git a/src/core/hle/service/bpc/bpc.cpp b/src/core/hle/service/bpc/bpc.cpp
index 1c1ecdb60..fac6b2f9c 100644
--- a/src/core/hle/service/bpc/bpc.cpp
+++ b/src/core/hle/service/bpc/bpc.cpp
@@ -23,9 +23,14 @@ public:
{5, nullptr, "GetBoardPowerControlEvent"},
{6, nullptr, "GetSleepButtonState"},
{7, nullptr, "GetPowerEvent"},
- {8, nullptr, "Unknown1"},
- {9, nullptr, "Unknown2"},
- {10, nullptr, "Unknown3"},
+ {8, nullptr, "CreateWakeupTimer"},
+ {9, nullptr, "CancelWakeupTimer"},
+ {10, nullptr, "EnableWakeupTimerOnDevice"},
+ {11, nullptr, "CreateWakeupTimerEx"},
+ {12, nullptr, "GetLastEnabledWakeupTimerType"},
+ {13, nullptr, "CleanAllWakeupTimers"},
+ {14, nullptr, "Unknown"},
+ {15, nullptr, "Unknown2"},
};
// clang-format on
@@ -38,10 +43,11 @@ public:
explicit BPC_R() : ServiceFramework{"bpc:r"} {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "GetExternalRtcValue"},
- {1, nullptr, "SetExternalRtcValue"},
- {2, nullptr, "ReadExternalRtcResetFlag"},
- {3, nullptr, "ClearExternalRtcResetFlag"},
+ {0, nullptr, "GetRtcTime"},
+ {1, nullptr, "SetRtcTime"},
+ {2, nullptr, "GetRtcResetDetected"},
+ {3, nullptr, "ClearRtcResetDetected"},
+ {4, nullptr, "SetUpRtcResetOnShutdown"},
};
// clang-format on
diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp
index 40a06c9fd..f311afa2f 100644
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -58,102 +58,103 @@ public:
{1, nullptr, "InitializeBluetooth"},
{2, nullptr, "EnableBluetooth"},
{3, nullptr, "DisableBluetooth"},
- {4, nullptr, "CleanupBluetooth"},
+ {4, nullptr, "FinalizeBluetooth"},
{5, nullptr, "GetAdapterProperties"},
{6, nullptr, "GetAdapterProperty"},
{7, nullptr, "SetAdapterProperty"},
- {8, nullptr, "StartDiscovery"},
- {9, nullptr, "CancelDiscovery"},
+ {8, nullptr, "StartInquiry"},
+ {9, nullptr, "StopInquiry"},
{10, nullptr, "CreateBond"},
{11, nullptr, "RemoveBond"},
{12, nullptr, "CancelBond"},
- {13, nullptr, "PinReply"},
- {14, nullptr, "SspReply"},
+ {13, nullptr, "RespondToPinRequest"},
+ {14, nullptr, "RespondToSspRequest"},
{15, nullptr, "GetEventInfo"},
{16, nullptr, "InitializeHid"},
- {17, nullptr, "HidConnect"},
- {18, nullptr, "HidDisconnect"},
- {19, nullptr, "HidSendData"},
- {20, nullptr, "HidSendData2"},
- {21, nullptr, "HidSetReport"},
- {22, nullptr, "HidGetReport"},
- {23, nullptr, "HidWakeController"},
- {24, nullptr, "HidAddPairedDevice"},
- {25, nullptr, "HidGetPairedDevice"},
- {26, nullptr, "CleanupHid"},
- {27, nullptr, "HidGetEventInfo"},
- {28, nullptr, "ExtSetTsi"},
- {29, nullptr, "ExtSetBurstMode"},
- {30, nullptr, "ExtSetZeroRetran"},
- {31, nullptr, "ExtSetMcMode"},
- {32, nullptr, "ExtStartLlrMode"},
- {33, nullptr, "ExtExitLlrMode"},
- {34, nullptr, "ExtSetRadio"},
- {35, nullptr, "ExtSetVisibility"},
- {36, nullptr, "ExtSetTbfcScan"},
+ {17, nullptr, "OpenHidConnection"},
+ {18, nullptr, "CloseHidConnection"},
+ {19, nullptr, "WriteHidData"},
+ {20, nullptr, "WriteHidData2"},
+ {21, nullptr, "SetHidReport"},
+ {22, nullptr, "GetHidReport"},
+ {23, nullptr, "TriggerConnection"},
+ {24, nullptr, "AddPairedDeviceInfo"},
+ {25, nullptr, "GetPairedDeviceInfo"},
+ {26, nullptr, "FinalizeHid"},
+ {27, nullptr, "GetHidEventInfo"},
+ {28, nullptr, "SetTsi"},
+ {29, nullptr, "EnableBurstMode"},
+ {30, nullptr, "SetZeroRetransmission"},
+ {31, nullptr, "EnableMcMode"},
+ {32, nullptr, "EnableLlrScan"},
+ {33, nullptr, "DisableLlrScan"},
+ {34, nullptr, "EnableRadio"},
+ {35, nullptr, "SetVisibility"},
+ {36, nullptr, "EnableTbfcScan"},
{37, nullptr, "RegisterHidReportEvent"},
- {38, nullptr, "HidGetReportEventInfo"},
+ {38, nullptr, "GetHidReportEventInfo"},
{39, nullptr, "GetLatestPlr"},
- {40, nullptr, "ExtGetPendingConnections"},
+ {40, nullptr, "GetPendingConnections"},
{41, nullptr, "GetChannelMap"},
- {42, nullptr, "EnableBluetoothBoostSetting"},
- {43, nullptr, "IsBluetoothBoostSettingEnabled"},
- {44, nullptr, "EnableBluetoothAfhSetting"},
- {45, nullptr, "IsBluetoothAfhSettingEnabled"},
- {46, nullptr, "InitializeBluetoothLe"},
- {47, nullptr, "EnableBluetoothLe"},
- {48, nullptr, "DisableBluetoothLe"},
- {49, nullptr, "CleanupBluetoothLe"},
- {50, nullptr, "SetLeVisibility"},
- {51, nullptr, "SetLeConnectionParameter"},
- {52, nullptr, "SetLeDefaultConnectionParameter"},
- {53, nullptr, "SetLeAdvertiseData"},
- {54, nullptr, "SetLeAdvertiseParameter"},
- {55, nullptr, "StartLeScan"},
- {56, nullptr, "StopLeScan"},
- {57, nullptr, "AddLeScanFilterCondition"},
- {58, nullptr, "DeleteLeScanFilterCondition"},
- {59, nullptr, "DeleteLeScanFilter"},
- {60, nullptr, "ClearLeScanFilters"},
- {61, nullptr, "EnableLeScanFilter"},
- {62, nullptr, "RegisterLeClient"},
- {63, nullptr, "UnregisterLeClient"},
- {64, nullptr, "UnregisterLeClientAll"},
- {65, nullptr, "LeClientConnect"},
- {66, nullptr, "LeClientCancelConnection"},
- {67, nullptr, "LeClientDisconnect"},
- {68, nullptr, "LeClientGetAttributes"},
- {69, nullptr, "LeClientDiscoverService"},
- {70, nullptr, "LeClientConfigureMtu"},
- {71, nullptr, "RegisterLeServer"},
- {72, nullptr, "UnregisterLeServer"},
- {73, nullptr, "LeServerConnect"},
- {74, nullptr, "LeServerDisconnect"},
- {75, nullptr, "CreateLeService"},
- {76, nullptr, "StartLeService"},
- {77, nullptr, "AddLeCharacteristic"},
- {78, nullptr, "AddLeDescriptor"},
- {79, nullptr, "GetLeCoreEventInfo"},
- {80, nullptr, "LeGetFirstCharacteristic"},
- {81, nullptr, "LeGetNextCharacteristic"},
- {82, nullptr, "LeGetFirstDescriptor"},
- {83, nullptr, "LeGetNextDescriptor"},
- {84, nullptr, "RegisterLeCoreDataPath"},
- {85, nullptr, "UnregisterLeCoreDataPath"},
- {86, nullptr, "RegisterLeHidDataPath"},
- {87, nullptr, "UnregisterLeHidDataPath"},
- {88, nullptr, "RegisterLeDataPath"},
- {89, nullptr, "UnregisterLeDataPath"},
- {90, nullptr, "LeClientReadCharacteristic"},
- {91, nullptr, "LeClientReadDescriptor"},
- {92, nullptr, "LeClientWriteCharacteristic"},
- {93, nullptr, "LeClientWriteDescriptor"},
- {94, nullptr, "LeClientRegisterNotification"},
- {95, nullptr, "LeClientDeregisterNotification"},
+ {42, nullptr, "EnableTxPowerBoostSetting"},
+ {43, nullptr, "IsTxPowerBoostSettingEnabled"},
+ {44, nullptr, "EnableAfhSetting"},
+ {45, nullptr, "IsAfhSettingEnabled"},
+ {46, nullptr, "InitializeBle"},
+ {47, nullptr, "EnableBle"},
+ {48, nullptr, "DisableBle"},
+ {49, nullptr, "FinalizeBle"},
+ {50, nullptr, "SetBleVisibility"},
+ {51, nullptr, "SetBleConnectionParameter"},
+ {52, nullptr, "SetBleDefaultConnectionParameter"},
+ {53, nullptr, "SetBleAdvertiseData"},
+ {54, nullptr, "SetBleAdvertiseParameter"},
+ {55, nullptr, "StartBleScan"},
+ {56, nullptr, "StopBleScan"},
+ {57, nullptr, "AddBleScanFilterCondition"},
+ {58, nullptr, "DeleteBleScanFilterCondition"},
+ {59, nullptr, "DeleteBleScanFilter"},
+ {60, nullptr, "ClearBleScanFilters"},
+ {61, nullptr, "EnableBleScanFilter"},
+ {62, nullptr, "RegisterGattClient"},
+ {63, nullptr, "UnregisterGattClient"},
+ {64, nullptr, "UnregisterAllGattClients"},
+ {65, nullptr, "ConnectGattServer"},
+ {66, nullptr, "CancelConnectGattServer"},
+ {67, nullptr, "DisconnectGattServer"},
+ {68, nullptr, "GetGattAttribute"},
+ {69, nullptr, "GetGattService"},
+ {70, nullptr, "ConfigureAttMtu"},
+ {71, nullptr, "RegisterGattServer"},
+ {72, nullptr, "UnregisterGattServer"},
+ {73, nullptr, "ConnectGattClient"},
+ {74, nullptr, "DisconnectGattClient"},
+ {75, nullptr, "AddGattService"},
+ {76, nullptr, "EnableGattService"},
+ {77, nullptr, "AddGattCharacteristic"},
+ {78, nullptr, "AddGattDescriptor"},
+ {79, nullptr, "GetBleManagedEventInfo"},
+ {80, nullptr, "GetGattFirstCharacteristic"},
+ {81, nullptr, "GetGattNextCharacteristic"},
+ {82, nullptr, "GetGattFirstDescriptor"},
+ {83, nullptr, "GetGattNextDescriptor"},
+ {84, nullptr, "RegisterGattManagedDataPath"},
+ {85, nullptr, "UnregisterGattManagedDataPath"},
+ {86, nullptr, "RegisterGattHidDataPath"},
+ {87, nullptr, "UnregisterGattHidDataPath"},
+ {88, nullptr, "RegisterGattDataPath"},
+ {89, nullptr, "UnregisterGattDataPath"},
+ {90, nullptr, "ReadGattCharacteristic"},
+ {91, nullptr, "ReadGattDescriptor"},
+ {92, nullptr, "WriteGattCharacteristic"},
+ {93, nullptr, "WriteGattDescriptor"},
+ {94, nullptr, "RegisterGattNotification"},
+ {95, nullptr, "UnregisterGattNotification"},
{96, nullptr, "GetLeHidEventInfo"},
{97, nullptr, "RegisterBleHidEvent"},
- {98, nullptr, "SetLeScanParameter"},
- {256, nullptr, "GetIsManufacturingMode"},
+ {98, nullptr, "SetBleScanParameter"},
+ {99, nullptr, "MoveToSecondaryPiconet"},
+ {256, nullptr, "IsManufacturingMode"},
{257, nullptr, "EmulateBluetoothCrash"},
{258, nullptr, "GetBleChannelMap"},
};
diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp
index 251b3c9df..0d251c6d0 100644
--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -132,66 +132,71 @@ public:
explicit BTM() : ServiceFramework{"btm"} {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "Unknown1"},
- {1, nullptr, "Unknown2"},
- {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"},
- {3, nullptr, "Unknown3"},
- {4, nullptr, "Unknown4"},
- {5, nullptr, "Unknown5"},
- {6, nullptr, "Unknown6"},
- {7, nullptr, "Unknown7"},
- {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"},
- {9, nullptr, "Unknown8"},
- {10, nullptr, "Unknown9"},
- {11, nullptr, "Unknown10"},
- {12, nullptr, "Unknown11"},
- {13, nullptr, "Unknown12"},
+ {0, nullptr, "GetState"},
+ {1, nullptr, "GetHostDeviceProperty"},
+ {2, nullptr, "AcquireDeviceConditionEvent"},
+ {3, nullptr, "GetDeviceCondition"},
+ {4, nullptr, "SetBurstMode"},
+ {5, nullptr, "SetSlotMode"},
+ {6, nullptr, "SetBluetoothMode"},
+ {7, nullptr, "SetWlanMode"},
+ {8, nullptr, "AcquireDeviceInfoEvent"},
+ {9, nullptr, "GetDeviceInfo"},
+ {10, nullptr, "AddDeviceInfo"},
+ {11, nullptr, "RemoveDeviceInfo"},
+ {12, nullptr, "IncreaseDeviceInfoOrder"},
+ {13, nullptr, "LlrNotify"},
{14, nullptr, "EnableRadio"},
{15, nullptr, "DisableRadio"},
- {16, nullptr, "Unknown13"},
- {17, nullptr, "Unknown14"},
- {18, nullptr, "Unknown15"},
- {19, nullptr, "Unknown16"},
- {20, nullptr, "Unknown17"},
- {21, nullptr, "Unknown18"},
- {22, nullptr, "Unknown19"},
- {23, nullptr, "Unknown20"},
- {24, nullptr, "Unknown21"},
- {25, nullptr, "Unknown22"},
- {26, nullptr, "Unknown23"},
- {27, nullptr, "Unknown24"},
- {28, nullptr, "Unknown25"},
- {29, nullptr, "Unknown26"},
- {30, nullptr, "Unknown27"},
- {31, nullptr, "Unknown28"},
- {32, nullptr, "Unknown29"},
- {33, nullptr, "Unknown30"},
- {34, nullptr, "Unknown31"},
- {35, nullptr, "Unknown32"},
- {36, nullptr, "Unknown33"},
- {37, nullptr, "Unknown34"},
- {38, nullptr, "Unknown35"},
- {39, nullptr, "Unknown36"},
- {40, nullptr, "Unknown37"},
- {41, nullptr, "Unknown38"},
- {42, nullptr, "Unknown39"},
- {43, nullptr, "Unknown40"},
- {44, nullptr, "Unknown41"},
- {45, nullptr, "Unknown42"},
- {46, nullptr, "Unknown43"},
- {47, nullptr, "Unknown44"},
- {48, nullptr, "Unknown45"},
- {49, nullptr, "Unknown46"},
- {50, nullptr, "Unknown47"},
- {51, nullptr, "Unknown48"},
- {52, nullptr, "Unknown49"},
- {53, nullptr, "Unknown50"},
- {54, nullptr, "Unknown51"},
- {55, nullptr, "Unknown52"},
- {56, nullptr, "Unknown53"},
- {57, nullptr, "Unknown54"},
- {58, nullptr, "Unknown55"},
- {59, nullptr, "Unknown56"},
+ {16, nullptr, "HidDisconnect"},
+ {17, nullptr, "HidSetRetransmissionMode"},
+ {18, nullptr, "AcquireAwakeReqEvent"},
+ {19, nullptr, "AcquireLlrStateEvent"},
+ {20, nullptr, "IsLlrStarted"},
+ {21, nullptr, "EnableSlotSaving"},
+ {22, nullptr, "ProtectDeviceInfo"},
+ {23, nullptr, "AcquireBleScanEvent"},
+ {24, nullptr, "GetBleScanParameterGeneral"},
+ {25, nullptr, "GetBleScanParameterSmartDevice"},
+ {26, nullptr, "StartBleScanForGeneral"},
+ {27, nullptr, "StopBleScanForGeneral"},
+ {28, nullptr, "GetBleScanResultsForGeneral"},
+ {29, nullptr, "StartBleScanForPairedDevice"},
+ {30, nullptr, "StopBleScanForPairedDevice"},
+ {31, nullptr, "StartBleScanForSmartDevice"},
+ {32, nullptr, "StopBleScanForSmartDevice"},
+ {33, nullptr, "GetBleScanResultsForSmartDevice"},
+ {34, nullptr, "AcquireBleConnectionEvent"},
+ {35, nullptr, "BleConnect"},
+ {36, nullptr, "BleOverrideConnection"},
+ {37, nullptr, "BleDisconnect"},
+ {38, nullptr, "BleGetConnectionState"},
+ {39, nullptr, "BleGetGattClientConditionList"},
+ {40, nullptr, "AcquireBlePairingEvent"},
+ {41, nullptr, "BlePairDevice"},
+ {42, nullptr, "BleUnpairDeviceOnBoth"},
+ {43, nullptr, "BleUnpairDevice"},
+ {44, nullptr, "BleGetPairedAddresses"},
+ {45, nullptr, "AcquireBleServiceDiscoveryEvent"},
+ {46, nullptr, "GetGattServices"},
+ {47, nullptr, "GetGattService"},
+ {48, nullptr, "GetGattIncludedServices"},
+ {49, nullptr, "GetBelongingService"},
+ {50, nullptr, "GetGattCharacteristics"},
+ {51, nullptr, "GetGattDescriptors"},
+ {52, nullptr, "AcquireBleMtuConfigEvent"},
+ {53, nullptr, "ConfigureBleMtu"},
+ {54, nullptr, "GetBleMtu"},
+ {55, nullptr, "RegisterBleGattDataPath"},
+ {56, nullptr, "UnregisterBleGattDataPath"},
+ {57, nullptr, "RegisterAppletResourceUserId"},
+ {58, nullptr, "UnregisterAppletResourceUserId"},
+ {59, nullptr, "SetAppletResourceUserId"},
+ {60, nullptr, "Unknown60"},
+ {61, nullptr, "Unknown61"},
+ {62, nullptr, "Unknown62"},
+ {63, nullptr, "Unknown63"},
+ {64, nullptr, "Unknown64"},
};
// clang-format on
@@ -204,19 +209,19 @@ public:
explicit BTM_DBG() : ServiceFramework{"btm:dbg"} {
// clang-format off
static const FunctionInfo functions[] = {
- {0, nullptr, "RegisterSystemEventForDiscovery"},
- {1, nullptr, "Unknown1"},
- {2, nullptr, "Unknown2"},
- {3, nullptr, "Unknown3"},
- {4, nullptr, "Unknown4"},
- {5, nullptr, "Unknown5"},
- {6, nullptr, "Unknown6"},
- {7, nullptr, "Unknown7"},
- {8, nullptr, "Unknown8"},
- {9, nullptr, "Unknown9"},
- {10, nullptr, "Unknown10"},
- {11, nullptr, "Unknown11"},
- {12, nullptr, "Unknown11"},
+ {0, nullptr, "AcquireDiscoveryEvent"},
+ {1, nullptr, "StartDiscovery"},
+ {2, nullptr, "CancelDiscovery"},
+ {3, nullptr, "GetDeviceProperty"},
+ {4, nullptr, "CreateBond"},
+ {5, nullptr, "CancelBond"},
+ {6, nullptr, "SetTsiMode"},
+ {7, nullptr, "GeneralTest"},
+ {8, nullptr, "HidConnect"},
+ {9, nullptr, "GeneralGet"},
+ {10, nullptr, "GetGattClientDisconnectionReason"},
+ {11, nullptr, "GetBleConnectionParameter"},
+ {12, nullptr, "GetBleConnectionParameterRequest"},
};
// clang-format on
diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp
index 26c8a7081..ba5749b84 100644
--- a/src/core/hle/service/caps/caps.cpp
+++ b/src/core/hle/service/caps/caps.cpp
@@ -1,4 +1,4 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps.h b/src/core/hle/service/caps/caps.h
index fc70a4c27..b8c67b6e2 100644
--- a/src/core/hle/service/caps/caps.h
+++ b/src/core/hle/service/caps/caps.h
@@ -1,4 +1,4 @@
-// Copyright 2018 yuzu emulator team
+// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@@ -12,73 +12,79 @@ class ServiceManager;
namespace Service::Capture {
-enum AlbumImageOrientation {
+enum class AlbumImageOrientation {
Orientation0 = 0,
Orientation1 = 1,
Orientation2 = 2,
Orientation3 = 3,
};
-enum AlbumReportOption {
+enum class AlbumReportOption {
Disable = 0,
Enable = 1,
};
-enum ContentType : u8 {
+enum class ContentType : u8 {
Screenshot = 0,
Movie = 1,
ExtraMovie = 3,
};
-enum AlbumStorage : u8 {
+enum class AlbumStorage : u8 {
NAND = 0,
SD = 1,
};
struct AlbumFileDateTime {
- u16 year;
- u8 month;
- u8 day;
- u8 hour;
- u8 minute;
- u8 second;
- u8 uid;
+ s16 year{};
+ s8 month{};
+ s8 day{};
+ s8 hour{};
+ s8 minute{};
+ s8 second{};
+ s8 uid{};
};
+static_assert(sizeof(AlbumFileDateTime) == 0x8, "AlbumFileDateTime has incorrect size.");
struct AlbumEntry {
- u64 size;
- u64 application_id;
- AlbumFileDateTime datetime;
- AlbumStorage storage;
- ContentType content;
- u8 padding[6];
+ u64 size{};
+ u64 application_id{};
+ AlbumFileDateTime datetime{};
+ AlbumStorage storage{};
+ ContentType content{};
+ INSERT_PADDING_BYTES(6);
};
+static_assert(sizeof(AlbumEntry) == 0x20, "AlbumEntry has incorrect size.");
struct AlbumFileEntry {
- u64 size;
- u64 hash;
- AlbumFileDateTime datetime;
- AlbumStorage storage;
- ContentType content;
- u8 padding[5];
- u8 unknown;
+ u64 size{}; // Size of the entry
+ u64 hash{}; // AES256 with hardcoded key over AlbumEntry
+ AlbumFileDateTime datetime{};
+ AlbumStorage storage{};
+ ContentType content{};
+ INSERT_PADDING_BYTES(5);
+ u8 unknown{1}; // Set to 1 on official SW
};
+static_assert(sizeof(AlbumFileEntry) == 0x20, "AlbumFileEntry has incorrect size.");
struct ApplicationAlbumEntry {
- u64 size;
- u64 hash;
- AlbumFileDateTime datetime;
- AlbumStorage storage;
- ContentType content;
- u8 padding[5];
- u8 unknown;
+ u64 size{}; // Size of the entry
+ u64 hash{}; // AES256 with hardcoded key over AlbumEntry
+ AlbumFileDateTime datetime{};
+ AlbumStorage storage{};
+ ContentType content{};
+ INSERT_PADDING_BYTES(5);
+ u8 unknown{1}; // Set to 1 on official SW
};
+static_assert(sizeof(ApplicationAlbumEntry) == 0x20, "ApplicationAlbumEntry has incorrect size.");
struct ApplicationAlbumFileEntry {
- ApplicationAlbumEntry entry;
- AlbumFileDateTime datetime;
- u64 unknown;
+ ApplicationAlbumEntry entry{};
+ AlbumFileDateTime datetime{};
+ u64 unknown{};
};
+static_assert(sizeof(ApplicationAlbumFileEntry) == 0x30,
+ "ApplicationAlbumFileEntry has incorrect size.");
/// Registers all Capture services with the specified service manager.
void InstallInterfaces(SM::ServiceManager& sm);
diff --git a/src/core/hle/service/caps/caps_a.cpp b/src/core/hle/service/caps/caps_a.cpp
index 88a3fdc05..a0a3b2ae3 100644
--- a/src/core/hle/service/caps/caps_a.cpp
+++ b/src/core/hle/service/caps/caps_a.cpp
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_a.h b/src/core/hle/service/caps/caps_a.h
index 8de832491..cb93aad5b 100644
--- a/src/core/hle/service/caps/caps_a.h
+++ b/src/core/hle/service/caps/caps_a.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_c.cpp b/src/core/hle/service/caps/caps_c.cpp
index ea6452ffa..ab17a187e 100644
--- a/src/core/hle/service/caps/caps_c.cpp
+++ b/src/core/hle/service/caps/caps_c.cpp
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_c.h b/src/core/hle/service/caps/caps_c.h
index d07cdb441..a9d028689 100644
--- a/src/core/hle/service/caps/caps_c.h
+++ b/src/core/hle/service/caps/caps_c.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_sc.cpp b/src/core/hle/service/caps/caps_sc.cpp
index d01a8a58e..822ee96c8 100644
--- a/src/core/hle/service/caps/caps_sc.cpp
+++ b/src/core/hle/service/caps/caps_sc.cpp
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_sc.h b/src/core/hle/service/caps/caps_sc.h
index 9ba372f7a..ac3e929ca 100644
--- a/src/core/hle/service/caps/caps_sc.h
+++ b/src/core/hle/service/caps/caps_sc.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_ss.cpp b/src/core/hle/service/caps/caps_ss.cpp
index eaa3a7494..24dc716e7 100644
--- a/src/core/hle/service/caps/caps_ss.cpp
+++ b/src/core/hle/service/caps/caps_ss.cpp
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_ss.h b/src/core/hle/service/caps/caps_ss.h
index e258a6925..450686e4f 100644
--- a/src/core/hle/service/caps/caps_ss.h
+++ b/src/core/hle/service/caps/caps_ss.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_su.cpp b/src/core/hle/service/caps/caps_su.cpp
index e8b0698e8..fffb2ecf9 100644
--- a/src/core/hle/service/caps/caps_su.cpp
+++ b/src/core/hle/service/caps/caps_su.cpp
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_su.h b/src/core/hle/service/caps/caps_su.h
index c494d7c84..62c9603a9 100644
--- a/src/core/hle/service/caps/caps_su.h
+++ b/src/core/hle/service/caps/caps_su.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/caps/caps_u.cpp b/src/core/hle/service/caps/caps_u.cpp
index 78bab6ed8..f36d8de2d 100644
--- a/src/core/hle/service/caps/caps_u.cpp
+++ b/src/core/hle/service/caps/caps_u.cpp
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@@ -58,19 +58,25 @@ void CAPS_U::GetAlbumContentsFileListForApplication(Kernel::HLERequestContext& c
// u8 ContentType, two s64s, and an u64 AppletResourceUserId. Returns an output u64 for total
// output entries (which is copied to a s32 by official SW).
IPC::RequestParser rp{ctx};
- [[maybe_unused]] const auto application_album_file_entries = rp.PopRaw<std::array<u8, 0x30>>();
- const auto pid = rp.Pop<s32>();
- const auto content_type = rp.PopRaw<ContentType>();
- [[maybe_unused]] const auto start_datetime = rp.PopRaw<AlbumFileDateTime>();
- [[maybe_unused]] const auto end_datetime = rp.PopRaw<AlbumFileDateTime>();
- const auto applet_resource_user_id = rp.Pop<u64>();
+ const auto pid{rp.Pop<s32>()};
+ const auto content_type{rp.PopEnum<ContentType>()};
+ const auto start_posix_time{rp.Pop<s64>()};
+ const auto end_posix_time{rp.Pop<s64>()};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+
+ // TODO: Update this when we implement the album.
+ // Currently we do not have a method of accessing album entries, set this to 0 for now.
+ constexpr s32 total_entries{0};
+
LOG_WARNING(Service_Capture,
- "(STUBBED) called. pid={}, content_type={}, applet_resource_user_id={}", pid,
- content_type, applet_resource_user_id);
+ "(STUBBED) called. pid={}, content_type={}, start_posix_time={}, "
+ "end_posix_time={}, applet_resource_user_id={}, total_entries={}",
+ pid, content_type, start_posix_time, end_posix_time, applet_resource_user_id,
+ total_entries);
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
- rb.Push<s32>(0);
+ rb.Push(total_entries);
}
} // namespace Service::Capture
diff --git a/src/core/hle/service/caps/caps_u.h b/src/core/hle/service/caps/caps_u.h
index e6e0716ff..689364de4 100644
--- a/src/core/hle/service/caps/caps_u.h
+++ b/src/core/hle/service/caps/caps_u.h
@@ -1,4 +1,4 @@
-// Copyright 2020 yuzu emulator team
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp
index ad6841a64..da6b74a22 100644
--- a/src/core/hle/service/es/es.cpp
+++ b/src/core/hle/service/es/es.cpp
@@ -27,8 +27,8 @@ public:
{8, &ETicket::GetTitleKey, "GetTitleKey"},
{9, &ETicket::CountCommonTicket, "CountCommonTicket"},
{10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"},
- {11, &ETicket::ListCommonTicket, "ListCommonTicket"},
- {12, &ETicket::ListPersonalizedTicket, "ListPersonalizedTicket"},
+ {11, &ETicket::ListCommonTicketRightsIds, "ListCommonTicketRightsIds"},
+ {12, &ETicket::ListPersonalizedTicketRightsIds, "ListPersonalizedTicketRightsIds"},
{13, nullptr, "ListMissingPersonalizedTicket"},
{14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"},
{15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"},
@@ -55,7 +55,46 @@ public:
{36, nullptr, "DeleteAllInactiveELicenseRequiredPersonalizedTicket"},
{37, nullptr, "OwnTicket2"},
{38, nullptr, "OwnTicket3"},
+ {501, nullptr, "Unknown501"},
+ {502, nullptr, "Unknown502"},
{503, nullptr, "GetTitleKey"},
+ {504, nullptr, "Unknown504"},
+ {508, nullptr, "Unknown508"},
+ {509, nullptr, "Unknown509"},
+ {510, nullptr, "Unknown510"},
+ {511, nullptr, "Unknown511"},
+ {1001, nullptr, "Unknown1001"},
+ {1002, nullptr, "Unknown1001"},
+ {1003, nullptr, "Unknown1003"},
+ {1004, nullptr, "Unknown1004"},
+ {1005, nullptr, "Unknown1005"},
+ {1006, nullptr, "Unknown1006"},
+ {1007, nullptr, "Unknown1007"},
+ {1009, nullptr, "Unknown1009"},
+ {1010, nullptr, "Unknown1010"},
+ {1011, nullptr, "Unknown1011"},
+ {1012, nullptr, "Unknown1012"},
+ {1013, nullptr, "Unknown1013"},
+ {1014, nullptr, "Unknown1014"},
+ {1015, nullptr, "Unknown1015"},
+ {1016, nullptr, "Unknown1016"},
+ {1017, nullptr, "Unknown1017"},
+ {1018, nullptr, "Unknown1018"},
+ {1019, nullptr, "Unknown1019"},
+ {1020, nullptr, "Unknown1020"},
+ {1021, nullptr, "Unknown1021"},
+ {1501, nullptr, "Unknown1501"},
+ {1502, nullptr, "Unknown1502"},
+ {1503, nullptr, "Unknown1503"},
+ {1504, nullptr, "Unknown1504"},
+ {1505, nullptr, "Unknown1505"},
+ {2000, nullptr, "Unknown2000"},
+ {2001, nullptr, "Unknown2001"},
+ {2100, nullptr, "Unknown2100"},
+ {2501, nullptr, "Unknown2501"},
+ {2502, nullptr, "Unknown2502"},
+ {3001, nullptr, "Unknown3001"},
+ {3002, nullptr, "Unknown3002"},
};
// clang-format on
RegisterHandlers(functions);
@@ -147,7 +186,7 @@ private:
rb.Push<u32>(count);
}
- void ListCommonTicket(Kernel::HLERequestContext& ctx) {
+ void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) {
u32 out_entries;
if (keys.GetCommonTickets().empty())
out_entries = 0;
@@ -170,7 +209,7 @@ private:
rb.Push<u32>(out_entries);
}
- void ListPersonalizedTicket(Kernel::HLERequestContext& ctx) {
+ void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) {
u32 out_entries;
if (keys.GetPersonalizedTickets().empty())
out_entries = 0;
diff --git a/src/core/hle/service/eupld/eupld.cpp b/src/core/hle/service/eupld/eupld.cpp
index 2df30acee..0d6d244f4 100644
--- a/src/core/hle/service/eupld/eupld.cpp
+++ b/src/core/hle/service/eupld/eupld.cpp
@@ -19,6 +19,7 @@ public:
{1, nullptr, "ImportCrt"},
{2, nullptr, "ImportPki"},
{3, nullptr, "SetAutoUpload"},
+ {4, nullptr, "GetAutoUpload"},
};
// clang-format on
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index f6503fe2f..20c331b77 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -767,7 +767,7 @@ FSP_SRV::FSP_SRV(FileSystemController& fsc, const Core::Reporter& reporter)
{1014, nullptr, "OutputMultiProgramTagAccessLog"},
{1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
{1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
- {1200, nullptr, "OpenMultiCommitManager"},
+ {1200, &FSP_SRV::OpenMultiCommitManager, "OpenMultiCommitManager"},
{1300, nullptr, "OpenBisWiper"},
};
// clang-format on
@@ -988,4 +988,40 @@ void FSP_SRV::GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx) {
rb.Push(access_log_program_index);
}
+class IMultiCommitManager final : public ServiceFramework<IMultiCommitManager> {
+public:
+ explicit IMultiCommitManager() : ServiceFramework("IMultiCommitManager") {
+ static const FunctionInfo functions[] = {
+ {1, &IMultiCommitManager::Add, "Add"},
+ {2, &IMultiCommitManager::Commit, "Commit"},
+ };
+ RegisterHandlers(functions);
+ }
+
+private:
+ FileSys::VirtualFile backend;
+
+ void Add(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_FS, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+ }
+
+ void Commit(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_FS, "(STUBBED) called");
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+ }
+};
+
+void FSP_SRV::OpenMultiCommitManager(Kernel::HLERequestContext& ctx) {
+ LOG_DEBUG(Service_FS, "called");
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushIpcInterface<IMultiCommitManager>(std::make_shared<IMultiCommitManager>());
+}
+
} // namespace Service::FileSystem
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index d52b55999..dfb3e395b 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -50,6 +50,7 @@ private:
void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx);
void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx);
+ void OpenMultiCommitManager(Kernel::HLERequestContext& ctx);
FileSystemController& fsc;
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index 68f259b70..b7adaffc7 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -25,9 +25,13 @@ public:
{10101, &IFriendService::GetFriendList, "GetFriendList"},
{10102, nullptr, "UpdateFriendInfo"},
{10110, nullptr, "GetFriendProfileImage"},
+ {10120, nullptr, "Unknown10120"},
+ {10121, nullptr, "Unknown10121"},
{10200, nullptr, "SendFriendRequestForApplication"},
{10211, nullptr, "AddFacedFriendRequestForApplication"},
{10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
+ {10420, nullptr, "Unknown10420"},
+ {10421, nullptr, "Unknown10421"},
{10500, nullptr, "GetProfileList"},
{10600, nullptr, "DeclareOpenOnlinePlaySession"},
{10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
@@ -97,6 +101,8 @@ public:
{30900, nullptr, "SendFriendInvitation"},
{30910, nullptr, "ReadFriendInvitation"},
{30911, nullptr, "ReadAllFriendInvitations"},
+ {40100, nullptr, "Unknown40100"},
+ {40400, nullptr, "Unknown40400"},
{49900, nullptr, "DeleteNetworkServiceAccountCache"},
};
// clang-format on
diff --git a/src/core/hle/service/grc/grc.cpp b/src/core/hle/service/grc/grc.cpp
index 24910ac6c..401e0b208 100644
--- a/src/core/hle/service/grc/grc.cpp
+++ b/src/core/hle/service/grc/grc.cpp
@@ -17,6 +17,9 @@ public:
static const FunctionInfo functions[] = {
{1, nullptr, "OpenContinuousRecorder"},
{2, nullptr, "OpenGameMovieTrimmer"},
+ {3, nullptr, "OpenOffscreenRecorder"},
+ {101, nullptr, "CreateMovieMaker"},
+ {9903, nullptr, "SetOffscreenRecordingMarker"}
};
// clang-format on
diff --git a/src/core/hle/service/hid/controllers/debug_pad.cpp b/src/core/hle/service/hid/controllers/debug_pad.cpp
index 1f2131ec8..cb35919e9 100644
--- a/src/core/hle/service/hid/controllers/debug_pad.cpp
+++ b/src/core/hle/service/hid/controllers/debug_pad.cpp
@@ -23,7 +23,7 @@ void Controller_DebugPad::OnRelease() {}
void Controller_DebugPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
std::size_t size) {
- shared_memory.header.timestamp = core_timing.GetTicks();
+ shared_memory.header.timestamp = core_timing.GetCPUTicks();
shared_memory.header.total_entry_count = 17;
if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/gesture.cpp b/src/core/hle/service/hid/controllers/gesture.cpp
index 6e990dd00..b7b7bfeae 100644
--- a/src/core/hle/service/hid/controllers/gesture.cpp
+++ b/src/core/hle/service/hid/controllers/gesture.cpp
@@ -19,7 +19,7 @@ void Controller_Gesture::OnRelease() {}
void Controller_Gesture::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
std::size_t size) {
- shared_memory.header.timestamp = core_timing.GetTicks();
+ shared_memory.header.timestamp = core_timing.GetCPUTicks();
shared_memory.header.total_entry_count = 17;
if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index 358cb9329..feae89525 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -21,7 +21,7 @@ void Controller_Keyboard::OnRelease() {}
void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
std::size_t size) {
- shared_memory.header.timestamp = core_timing.GetTicks();
+ shared_memory.header.timestamp = core_timing.GetCPUTicks();
shared_memory.header.total_entry_count = 17;
if (!IsControllerActivated()) {
@@ -38,10 +38,11 @@ void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing,
cur_entry.sampling_number = last_entry.sampling_number + 1;
cur_entry.sampling_number2 = cur_entry.sampling_number;
+ cur_entry.key.fill(0);
+ cur_entry.modifier = 0;
+
for (std::size_t i = 0; i < keyboard_keys.size(); ++i) {
- for (std::size_t k = 0; k < KEYS_PER_BYTE; ++k) {
- cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << k);
- }
+ cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << (i % KEYS_PER_BYTE));
}
for (std::size_t i = 0; i < keyboard_mods.size(); ++i) {
diff --git a/src/core/hle/service/hid/controllers/mouse.cpp b/src/core/hle/service/hid/controllers/mouse.cpp
index 93d88ea50..ac40989c5 100644
--- a/src/core/hle/service/hid/controllers/mouse.cpp
+++ b/src/core/hle/service/hid/controllers/mouse.cpp
@@ -19,7 +19,7 @@ void Controller_Mouse::OnRelease() {}
void Controller_Mouse::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
std::size_t size) {
- shared_memory.header.timestamp = core_timing.GetTicks();
+ shared_memory.header.timestamp = core_timing.GetCPUTicks();
shared_memory.header.total_entry_count = 17;
if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index c55d900e2..ef67ad690 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -328,7 +328,7 @@ void Controller_NPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8*
const auto& last_entry =
main_controller->npad[main_controller->common.last_entry_index];
- main_controller->common.timestamp = core_timing.GetTicks();
+ main_controller->common.timestamp = core_timing.GetCPUTicks();
main_controller->common.last_entry_index =
(main_controller->common.last_entry_index + 1) % 17;
@@ -566,6 +566,14 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) {
connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;
}
+void Controller_NPad::SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode) {
+ gyroscope_zero_drift_mode = drift_mode;
+}
+
+Controller_NPad::GyroscopeZeroDriftMode Controller_NPad::GetGyroscopeZeroDriftMode() const {
+ return gyroscope_zero_drift_mode;
+}
+
void Controller_NPad::StartLRAssignmentMode() {
// Nothing internally is used for lr assignment mode. Since we have the ability to set the
// controller types from boot, it doesn't really matter about showing a selection screen
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 931f03430..5d4c58a43 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -58,6 +58,12 @@ public:
};
static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size");
+ enum class GyroscopeZeroDriftMode : u32 {
+ Loose = 0,
+ Standard = 1,
+ Tight = 2,
+ };
+
enum class NpadHoldType : u64 {
Vertical = 0,
Horizontal = 1,
@@ -117,6 +123,8 @@ public:
void ConnectNPad(u32 npad_id);
void DisconnectNPad(u32 npad_id);
+ void SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode);
+ GyroscopeZeroDriftMode GetGyroscopeZeroDriftMode() const;
LedPattern GetLedPattern(u32 npad_id);
void SetVibrationEnabled(bool can_vibrate);
bool IsVibrationEnabled() const;
@@ -324,8 +332,8 @@ private:
std::array<Kernel::EventPair, 10> styleset_changed_events;
Vibration last_processed_vibration{};
std::array<ControllerHolder, 10> connected_controllers{};
+ GyroscopeZeroDriftMode gyroscope_zero_drift_mode{GyroscopeZeroDriftMode::Standard};
bool can_controllers_vibrate{true};
-
std::array<ControllerPad, 10> npad_pad_states{};
bool is_in_lr_assignment_mode{false};
Core::System& system;
diff --git a/src/core/hle/service/hid/controllers/stubbed.cpp b/src/core/hle/service/hid/controllers/stubbed.cpp
index 9e527d176..e7483bfa2 100644
--- a/src/core/hle/service/hid/controllers/stubbed.cpp
+++ b/src/core/hle/service/hid/controllers/stubbed.cpp
@@ -23,7 +23,7 @@ void Controller_Stubbed::OnUpdate(const Core::Timing::CoreTiming& core_timing, u
}
CommonHeader header{};
- header.timestamp = core_timing.GetTicks();
+ header.timestamp = core_timing.GetCPUTicks();
header.total_entry_count = 17;
header.entry_count = 0;
header.last_entry_index = 0;
diff --git a/src/core/hle/service/hid/controllers/touchscreen.cpp b/src/core/hle/service/hid/controllers/touchscreen.cpp
index 1c6e55566..e326f8f5c 100644
--- a/src/core/hle/service/hid/controllers/touchscreen.cpp
+++ b/src/core/hle/service/hid/controllers/touchscreen.cpp
@@ -22,7 +22,7 @@ void Controller_Touchscreen::OnRelease() {}
void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
std::size_t size) {
- shared_memory.header.timestamp = core_timing.GetTicks();
+ shared_memory.header.timestamp = core_timing.GetCPUTicks();
shared_memory.header.total_entry_count = 17;
if (!IsControllerActivated()) {
@@ -49,7 +49,7 @@ void Controller_Touchscreen::OnUpdate(const Core::Timing::CoreTiming& core_timin
touch_entry.diameter_x = Settings::values.touchscreen.diameter_x;
touch_entry.diameter_y = Settings::values.touchscreen.diameter_y;
touch_entry.rotation_angle = Settings::values.touchscreen.rotation_angle;
- const u64 tick = core_timing.GetTicks();
+ const u64 tick = core_timing.GetCPUTicks();
touch_entry.delta_time = tick - last_touch;
last_touch = tick;
touch_entry.finger = Settings::values.touchscreen.finger;
diff --git a/src/core/hle/service/hid/controllers/xpad.cpp b/src/core/hle/service/hid/controllers/xpad.cpp
index 27511b27b..2503ef241 100644
--- a/src/core/hle/service/hid/controllers/xpad.cpp
+++ b/src/core/hle/service/hid/controllers/xpad.cpp
@@ -20,7 +20,7 @@ void Controller_XPad::OnRelease() {}
void Controller_XPad::OnUpdate(const Core::Timing::CoreTiming& core_timing, u8* data,
std::size_t size) {
for (auto& xpad_entry : shared_memory.shared_memory_entries) {
- xpad_entry.header.timestamp = core_timing.GetTicks();
+ xpad_entry.header.timestamp = core_timing.GetCPUTicks();
xpad_entry.header.total_entry_count = 17;
if (!IsControllerActivated()) {
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 5559587e3..e9020e0dc 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -39,11 +39,9 @@ namespace Service::HID {
// Updating period for each HID device.
// TODO(ogniK): Find actual polling rate of hid
-constexpr s64 pad_update_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 66);
-[[maybe_unused]] constexpr s64 accelerometer_update_ticks =
- static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
-[[maybe_unused]] constexpr s64 gyroscope_update_ticks =
- static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 100);
+constexpr s64 pad_update_ticks = static_cast<s64>(1000000000 / 66);
+[[maybe_unused]] constexpr s64 accelerometer_update_ticks = static_cast<s64>(1000000000 / 100);
+[[maybe_unused]] constexpr s64 gyroscope_update_ticks = static_cast<s64>(1000000000 / 100);
constexpr std::size_t SHARED_MEMORY_SIZE = 0x40000;
IAppletResource::IAppletResource(Core::System& system)
@@ -78,8 +76,8 @@ IAppletResource::IAppletResource(Core::System& system)
// Register update callbacks
pad_update_event =
- Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 cycles_late) {
- UpdateControllers(userdata, cycles_late);
+ Core::Timing::CreateEvent("HID::UpdatePadCallback", [this](u64 userdata, s64 ns_late) {
+ UpdateControllers(userdata, ns_late);
});
// TODO(shinyquagsire23): Other update callbacks? (accel, gyro?)
@@ -109,7 +107,7 @@ void IAppletResource::GetSharedMemoryHandle(Kernel::HLERequestContext& ctx) {
rb.PushCopyObjects(shared_mem);
}
-void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
+void IAppletResource::UpdateControllers(u64 userdata, s64 ns_late) {
auto& core_timing = system.CoreTiming();
const bool should_reload = Settings::values.is_device_reload_pending.exchange(false);
@@ -120,7 +118,7 @@ void IAppletResource::UpdateControllers(u64 userdata, s64 cycles_late) {
controller->OnUpdate(core_timing, shared_mem->GetPointer(), SHARED_MEMORY_SIZE);
}
- core_timing.ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
+ core_timing.ScheduleEvent(pad_update_ticks - ns_late, pad_update_event);
}
class IActiveVibrationDeviceList final : public ServiceFramework<IActiveVibrationDeviceList> {
@@ -157,11 +155,11 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
{11, &Hid::ActivateTouchScreen, "ActivateTouchScreen"},
{21, &Hid::ActivateMouse, "ActivateMouse"},
{31, &Hid::ActivateKeyboard, "ActivateKeyboard"},
- {32, nullptr, "SendKeyboardLockKeyEvent"},
+ {32, &Hid::SendKeyboardLockKeyEvent, "SendKeyboardLockKeyEvent"},
{40, nullptr, "AcquireXpadIdEventHandle"},
{41, nullptr, "ReleaseXpadIdEventHandle"},
{51, &Hid::ActivateXpad, "ActivateXpad"},
- {55, nullptr, "GetXpadIds"},
+ {55, &Hid::GetXpadIDs, "GetXpadIds"},
{56, nullptr, "ActivateJoyXpad"},
{58, nullptr, "GetJoyXpadLifoHandle"},
{59, nullptr, "GetJoyXpadIds"},
@@ -185,8 +183,8 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
{77, nullptr, "GetAccelerometerPlayMode"},
{78, nullptr, "ResetAccelerometerPlayMode"},
{79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"},
- {80, nullptr, "GetGyroscopeZeroDriftMode"},
- {81, nullptr, "ResetGyroscopeZeroDriftMode"},
+ {80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"},
+ {81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"},
{82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"},
{83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"},
{91, &Hid::ActivateGesture, "ActivateGesture"},
@@ -230,15 +228,15 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
{211, nullptr, "IsVibrationDeviceMounted"},
{300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"},
{301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"},
- {302, nullptr, "StopConsoleSixAxisSensor"},
- {303, nullptr, "ActivateSevenSixAxisSensor"},
- {304, nullptr, "StartSevenSixAxisSensor"},
+ {302, &Hid::StopConsoleSixAxisSensor, "StopConsoleSixAxisSensor"},
+ {303, &Hid::ActivateSevenSixAxisSensor, "ActivateSevenSixAxisSensor"},
+ {304, &Hid::StartSevenSixAxisSensor, "StartSevenSixAxisSensor"},
{305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"},
{306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"},
- {307, nullptr, "FinalizeSevenSixAxisSensor"},
+ {307, &Hid::FinalizeSevenSixAxisSensor, "FinalizeSevenSixAxisSensor"},
{308, nullptr, "SetSevenSixAxisSensorFusionStrength"},
{309, nullptr, "GetSevenSixAxisSensorFusionStrength"},
- {310, nullptr, "ResetSevenSixAxisSensorTimestamp"},
+ {310, &Hid::ResetSevenSixAxisSensorTimestamp, "ResetSevenSixAxisSensorTimestamp"},
{400, nullptr, "IsUsbFullKeyControllerEnabled"},
{401, nullptr, "EnableUsbFullKeyController"},
{402, nullptr, "IsUsbFullKeyControllerConnected"},
@@ -319,6 +317,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
+void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+
+ LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push(0);
+}
+
void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
@@ -363,6 +372,15 @@ void Hid::ActivateKeyboard(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
+void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto flags{rp.Pop<u32>()};
+ LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto unknown{rp.Pop<u32>()};
@@ -402,15 +420,59 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
+void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto handle{rp.Pop<u32>()};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+
+ LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle,
+ applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto handle{rp.Pop<u32>()};
const auto drift_mode{rp.Pop<u32>()};
const auto applet_resource_user_id{rp.Pop<u64>()};
- LOG_WARNING(Service_HID,
- "(STUBBED) called, handle={}, drift_mode={}, applet_resource_user_id={}", handle,
- drift_mode, applet_resource_user_id);
+ applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode{drift_mode});
+
+ LOG_DEBUG(Service_HID, "called, handle={}, drift_mode={}, applet_resource_user_id={}", handle,
+ drift_mode, applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto handle{rp.Pop<u32>()};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+
+ LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
+ applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u32>(
+ static_cast<u32>(applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .GetGyroscopeZeroDriftMode()));
+}
+
+void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto handle{rp.Pop<u32>()};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+
+ applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode::Standard);
+
+ LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
+ applet_resource_user_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
@@ -821,33 +883,35 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
-void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
+void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto handle{rp.Pop<u32>()};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
- LOG_WARNING(Service_HID, "(STUBBED) called, handle={}", handle);
+ LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle,
+ applet_resource_user_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
-void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) {
+void Hid::ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
- const auto unknown{rp.Pop<u32>()};
- LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, unknown={}",
- applet_resource_user_id, unknown);
+ LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
+ applet_resource_user_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
-void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
+void Hid::StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
- const auto unknown{rp.Pop<u32>()};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
- LOG_WARNING(Service_HID, "(STUBBED) called, unknown={}", unknown);
+ LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
+ applet_resource_user_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
@@ -871,6 +935,51 @@ void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
+void Hid::FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+
+ LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
+ applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+
+ LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
+ applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto applet_resource_user_id{rp.Pop<u64>()};
+ const auto is_palma_all_connectable{rp.Pop<bool>()};
+
+ LOG_WARNING(Service_HID,
+ "(STUBBED) called, applet_resource_user_id={}, is_palma_all_connectable={}",
+ applet_resource_user_id, is_palma_all_connectable);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto palma_boost_mode{rp.Pop<bool>()};
+
+ LOG_WARNING(Service_HID, "(STUBBED) called, palma_boost_mode={}", palma_boost_mode);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
class HidDbg final : public ServiceFramework<HidDbg> {
public:
explicit HidDbg() : ServiceFramework{"hid:dbg"} {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 23552efb1..6fb048360 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -86,14 +86,19 @@ public:
private:
void CreateAppletResource(Kernel::HLERequestContext& ctx);
void ActivateXpad(Kernel::HLERequestContext& ctx);
+ void GetXpadIDs(Kernel::HLERequestContext& ctx);
void ActivateDebugPad(Kernel::HLERequestContext& ctx);
void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
void ActivateMouse(Kernel::HLERequestContext& ctx);
void ActivateKeyboard(Kernel::HLERequestContext& ctx);
+ void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx);
void ActivateGesture(Kernel::HLERequestContext& ctx);
void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx);
void StartSixAxisSensor(Kernel::HLERequestContext& ctx);
+ void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
+ void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
+ void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx);
void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
@@ -125,11 +130,15 @@ private:
void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
- void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
- void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
- void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
+ void StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
+ void ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
+ void StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
+ void FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
+ void ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx);
+ void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
+ void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
std::shared_ptr<IAppletResource> applet_resource;
Core::System& system;
diff --git a/src/core/hle/service/hid/irs.cpp b/src/core/hle/service/hid/irs.cpp
index 36ed6f7da..e82fd031b 100644
--- a/src/core/hle/service/hid/irs.cpp
+++ b/src/core/hle/service/hid/irs.cpp
@@ -98,7 +98,7 @@ void IRS::GetImageTransferProcessorState(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 5};
rb.Push(RESULT_SUCCESS);
- rb.PushRaw<u64>(system.CoreTiming().GetTicks());
+ rb.PushRaw<u64>(system.CoreTiming().GetCPUTicks());
rb.PushRaw<u32>(0);
}
diff --git a/src/core/hle/service/lbl/lbl.cpp b/src/core/hle/service/lbl/lbl.cpp
index e8f9f2d29..17350b403 100644
--- a/src/core/hle/service/lbl/lbl.cpp
+++ b/src/core/hle/service/lbl/lbl.cpp
@@ -47,6 +47,7 @@ public:
{26, &LBL::EnableVrMode, "EnableVrMode"},
{27, &LBL::DisableVrMode, "DisableVrMode"},
{28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"},
+ {29, nullptr, "IsAutoBrightnessControlSupported"},
};
// clang-format on
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index 92adde6d4..49972cd69 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -69,6 +69,7 @@ public:
{101, nullptr, "GetNetworkInfoLatestUpdate"},
{102, nullptr, "Scan"},
{103, nullptr, "ScanPrivate"},
+ {104, nullptr, "SetWirelessControllerRestriction"},
{200, nullptr, "OpenAccessPoint"},
{201, nullptr, "CloseAccessPoint"},
{202, nullptr, "CreateNetwork"},
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index 6ad3be1b3..64a526b9e 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -39,42 +39,61 @@ constexpr ResultCode ERROR_NOT_INITIALIZED{ErrorModule::Loader, 87};
constexpr std::size_t MAXIMUM_LOADED_RO{0x40};
constexpr std::size_t MAXIMUM_MAP_RETRIES{0x200};
+constexpr std::size_t TEXT_INDEX{0};
+constexpr std::size_t RO_INDEX{1};
+constexpr std::size_t DATA_INDEX{2};
+
+struct NRRCertification {
+ u64_le application_id_mask;
+ u64_le application_id_pattern;
+ INSERT_PADDING_BYTES(0x10);
+ std::array<u8, 0x100> public_key; // Also known as modulus
+ std::array<u8, 0x100> signature;
+};
+static_assert(sizeof(NRRCertification) == 0x220, "NRRCertification has invalid size.");
+
struct NRRHeader {
u32_le magic;
- INSERT_PADDING_BYTES(12);
- u64_le title_id_mask;
- u64_le title_id_pattern;
- INSERT_PADDING_BYTES(16);
- std::array<u8, 0x100> modulus;
- std::array<u8, 0x100> signature_1;
- std::array<u8, 0x100> signature_2;
- u64_le title_id;
+ u32_le certification_signature_key_generation; // 9.0.0+
+ INSERT_PADDING_WORDS(2);
+ NRRCertification certification;
+ std::array<u8, 0x100> signature;
+ u64_le application_id;
u32_le size;
- INSERT_PADDING_BYTES(4);
+ u8 nrr_kind; // 7.0.0+
+ INSERT_PADDING_BYTES(3);
u32_le hash_offset;
u32_le hash_count;
- INSERT_PADDING_BYTES(8);
+ INSERT_PADDING_WORDS(2);
+};
+static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has invalid size.");
+
+struct SegmentHeader {
+ u32_le memory_offset;
+ u32_le memory_size;
};
-static_assert(sizeof(NRRHeader) == 0x350, "NRRHeader has incorrect size.");
+static_assert(sizeof(SegmentHeader) == 0x8, "SegmentHeader has invalid size.");
struct NROHeader {
+ // Switchbrew calls this "Start" (0x10)
INSERT_PADDING_WORDS(1);
u32_le mod_offset;
INSERT_PADDING_WORDS(2);
+
+ // Switchbrew calls this "Header" (0x70)
u32_le magic;
u32_le version;
u32_le nro_size;
u32_le flags;
- u32_le text_offset;
- u32_le text_size;
- u32_le ro_offset;
- u32_le ro_size;
- u32_le rw_offset;
- u32_le rw_size;
+ // .text, .ro, .data
+ std::array<SegmentHeader, 3> segment_headers;
u32_le bss_size;
INSERT_PADDING_WORDS(1);
std::array<u8, 0x20> build_id;
- INSERT_PADDING_BYTES(0x20);
+ u32_le dso_handle_offset;
+ INSERT_PADDING_WORDS(1);
+ // .apiInfo, .dynstr, .dynsym
+ std::array<SegmentHeader, 3> segment_headers_2;
};
static_assert(sizeof(NROHeader) == 0x80, "NROHeader has invalid size.");
@@ -91,6 +110,7 @@ struct NROInfo {
std::size_t data_size{};
VAddr src_addr{};
};
+static_assert(sizeof(NROInfo) == 0x60, "NROInfo has invalid size.");
class DebugMonitor final : public ServiceFramework<DebugMonitor> {
public:
@@ -226,11 +246,11 @@ public:
return;
}
- if (system.CurrentProcess()->GetTitleID() != header.title_id) {
+ if (system.CurrentProcess()->GetTitleID() != header.application_id) {
LOG_ERROR(Service_LDR,
"Attempting to load NRR with title ID other than current process. (actual "
"{:016X})!",
- header.title_id);
+ header.application_id);
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(ERROR_INVALID_NRR);
return;
@@ -348,10 +368,10 @@ public:
ResultCode LoadNro(Kernel::Process* process, const NROHeader& nro_header, VAddr nro_addr,
VAddr start) const {
- const VAddr text_start{start + nro_header.text_offset};
- const VAddr ro_start{start + nro_header.ro_offset};
- const VAddr data_start{start + nro_header.rw_offset};
- const VAddr bss_start{data_start + nro_header.rw_size};
+ const VAddr text_start{start + nro_header.segment_headers[TEXT_INDEX].memory_offset};
+ const VAddr ro_start{start + nro_header.segment_headers[RO_INDEX].memory_offset};
+ const VAddr data_start{start + nro_header.segment_headers[DATA_INDEX].memory_offset};
+ const VAddr bss_start{data_start + nro_header.segment_headers[DATA_INDEX].memory_size};
const VAddr bss_end_addr{
Common::AlignUp(bss_start + nro_header.bss_size, Kernel::Memory::PageSize)};
@@ -360,9 +380,12 @@ public:
system.Memory().ReadBlock(src_addr, source_data.data(), source_data.size());
system.Memory().WriteBlock(dst_addr, source_data.data(), source_data.size());
}};
- CopyCode(nro_addr + nro_header.text_offset, text_start, nro_header.text_size);
- CopyCode(nro_addr + nro_header.ro_offset, ro_start, nro_header.ro_size);
- CopyCode(nro_addr + nro_header.rw_offset, data_start, nro_header.rw_size);
+ CopyCode(nro_addr + nro_header.segment_headers[TEXT_INDEX].memory_offset, text_start,
+ nro_header.segment_headers[TEXT_INDEX].memory_size);
+ CopyCode(nro_addr + nro_header.segment_headers[RO_INDEX].memory_offset, ro_start,
+ nro_header.segment_headers[RO_INDEX].memory_size);
+ CopyCode(nro_addr + nro_header.segment_headers[DATA_INDEX].memory_offset, data_start,
+ nro_header.segment_headers[DATA_INDEX].memory_size);
CASCADE_CODE(process->PageTable().SetCodeMemoryPermission(
text_start, ro_start - text_start, Kernel::Memory::MemoryPermission::ReadAndExecute));
@@ -484,9 +507,11 @@ public:
}
// Track the loaded NRO
- nro.insert_or_assign(*map_result, NROInfo{hash, *map_result, nro_size, bss_address,
- bss_size, header.text_size, header.ro_size,
- header.rw_size, nro_address});
+ nro.insert_or_assign(*map_result,
+ NROInfo{hash, *map_result, nro_size, bss_address, bss_size,
+ header.segment_headers[TEXT_INDEX].memory_size,
+ header.segment_headers[RO_INDEX].memory_size,
+ header.segment_headers[DATA_INDEX].memory_size, nro_address});
// Invalidate JIT caches for the newly mapped process code
system.InvalidateCpuInstructionCaches();
@@ -584,11 +609,21 @@ private:
static bool IsValidNRO(const NROHeader& header, u64 nro_size, u64 bss_size) {
return header.magic == Common::MakeMagic('N', 'R', 'O', '0') &&
header.nro_size == nro_size && header.bss_size == bss_size &&
- header.ro_offset == header.text_offset + header.text_size &&
- header.rw_offset == header.ro_offset + header.ro_size &&
- nro_size == header.rw_offset + header.rw_size &&
- Common::Is4KBAligned(header.text_size) && Common::Is4KBAligned(header.ro_size) &&
- Common::Is4KBAligned(header.rw_size);
+
+ header.segment_headers[RO_INDEX].memory_offset ==
+ header.segment_headers[TEXT_INDEX].memory_offset +
+ header.segment_headers[TEXT_INDEX].memory_size &&
+
+ header.segment_headers[DATA_INDEX].memory_offset ==
+ header.segment_headers[RO_INDEX].memory_offset +
+ header.segment_headers[RO_INDEX].memory_size &&
+
+ nro_size == header.segment_headers[DATA_INDEX].memory_offset +
+ header.segment_headers[DATA_INDEX].memory_size &&
+
+ Common::Is4KBAligned(header.segment_headers[TEXT_INDEX].memory_size) &&
+ Common::Is4KBAligned(header.segment_headers[RO_INDEX].memory_size) &&
+ Common::Is4KBAligned(header.segment_headers[DATA_INDEX].memory_size);
}
Core::System& system;
};
diff --git a/src/core/hle/service/lm/manager.cpp b/src/core/hle/service/lm/manager.cpp
index b67081b86..3ee2374e7 100644
--- a/src/core/hle/service/lm/manager.cpp
+++ b/src/core/hle/service/lm/manager.cpp
@@ -86,7 +86,8 @@ std::string FormatField(Field type, const std::vector<u8>& data) {
return Common::StringFromFixedZeroTerminatedBuffer(
reinterpret_cast<const char*>(data.data()), data.size());
default:
- UNIMPLEMENTED();
+ UNIMPLEMENTED_MSG("Unimplemented field type={}", type);
+ return "";
}
}
diff --git a/src/core/hle/service/mig/mig.cpp b/src/core/hle/service/mig/mig.cpp
index d16367f2c..113a4665c 100644
--- a/src/core/hle/service/mig/mig.cpp
+++ b/src/core/hle/service/mig/mig.cpp
@@ -20,6 +20,12 @@ public:
{101, nullptr, "ResumeServer"},
{200, nullptr, "CreateClient"},
{201, nullptr, "ResumeClient"},
+ {1001, nullptr, "Unknown1001"},
+ {1010, nullptr, "Unknown1010"},
+ {1100, nullptr, "Unknown1100"},
+ {1101, nullptr, "Unknown1101"},
+ {1200, nullptr, "Unknown1200"},
+ {1201, nullptr, "Unknown1201"}
};
// clang-format on
diff --git a/src/core/hle/service/mm/mm_u.cpp b/src/core/hle/service/mm/mm_u.cpp
index def63dc8a..25c24e537 100644
--- a/src/core/hle/service/mm/mm_u.cpp
+++ b/src/core/hle/service/mm/mm_u.cpp
@@ -14,14 +14,14 @@ public:
explicit MM_U() : ServiceFramework{"mm:u"} {
// clang-format off
static const FunctionInfo functions[] = {
- {0, &MM_U::Initialize, "Initialize"},
- {1, &MM_U::Finalize, "Finalize"},
- {2, &MM_U::SetAndWait, "SetAndWait"},
- {3, &MM_U::Get, "Get"},
- {4, &MM_U::InitializeWithId, "InitializeWithId"},
- {5, &MM_U::FinalizeWithId, "FinalizeWithId"},
- {6, &MM_U::SetAndWaitWithId, "SetAndWaitWithId"},
- {7, &MM_U::GetWithId, "GetWithId"},
+ {0, &MM_U::InitializeOld, "InitializeOld"},
+ {1, &MM_U::FinalizeOld, "FinalizeOld"},
+ {2, &MM_U::SetAndWaitOld, "SetAndWaitOld"},
+ {3, &MM_U::GetOld, "GetOld"},
+ {4, &MM_U::Initialize, "Initialize"},
+ {5, &MM_U::Finalize, "Finalize"},
+ {6, &MM_U::SetAndWait, "SetAndWait"},
+ {7, &MM_U::Get, "Get"},
};
// clang-format on
@@ -29,21 +29,21 @@ public:
}
private:
- void Initialize(Kernel::HLERequestContext& ctx) {
+ void InitializeOld(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
- void Finalize(Kernel::HLERequestContext& ctx) {
+ void FinalizeOld(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
- void SetAndWait(Kernel::HLERequestContext& ctx) {
+ void SetAndWaitOld(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
min = rp.Pop<u32>();
max = rp.Pop<u32>();
@@ -54,7 +54,7 @@ private:
rb.Push(RESULT_SUCCESS);
}
- void Get(Kernel::HLERequestContext& ctx) {
+ void GetOld(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
@@ -62,7 +62,7 @@ private:
rb.Push(current);
}
- void InitializeWithId(Kernel::HLERequestContext& ctx) {
+ void Initialize(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
@@ -70,14 +70,14 @@ private:
rb.Push<u32>(id); // Any non zero value
}
- void FinalizeWithId(Kernel::HLERequestContext& ctx) {
+ void Finalize(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
- void SetAndWaitWithId(Kernel::HLERequestContext& ctx) {
+ void SetAndWait(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
u32 input_id = rp.Pop<u32>();
min = rp.Pop<u32>();
@@ -90,7 +90,7 @@ private:
rb.Push(RESULT_SUCCESS);
}
- void GetWithId(Kernel::HLERequestContext& ctx) {
+ void Get(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_MM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp
index ec9aae04a..e38dea1f4 100644
--- a/src/core/hle/service/ncm/ncm.cpp
+++ b/src/core/hle/service/ncm/ncm.cpp
@@ -28,16 +28,16 @@ public:
{7, nullptr, "ResolveApplicationLegalInformationPath"},
{8, nullptr, "RedirectApplicationLegalInformationPath"},
{9, nullptr, "Refresh"},
- {10, nullptr, "RedirectProgramPath2"},
- {11, nullptr, "Refresh2"},
- {12, nullptr, "DeleteProgramPath"},
- {13, nullptr, "DeleteApplicationControlPath"},
- {14, nullptr, "DeleteApplicationHtmlDocumentPath"},
- {15, nullptr, "DeleteApplicationLegalInformationPath"},
- {16, nullptr, ""},
- {17, nullptr, ""},
- {18, nullptr, ""},
- {19, nullptr, ""},
+ {10, nullptr, "RedirectApplicationProgramPath"},
+ {11, nullptr, "ClearApplicationRedirection"},
+ {12, nullptr, "EraseProgramRedirection"},
+ {13, nullptr, "EraseApplicationControlRedirection"},
+ {14, nullptr, "EraseApplicationHtmlDocumentRedirection"},
+ {15, nullptr, "EraseApplicationLegalInformationRedirection"},
+ {16, nullptr, "ResolveProgramPathForDebug"},
+ {17, nullptr, "RedirectProgramPathForDebug"},
+ {18, nullptr, "RedirectApplicationProgramPathForDebug"},
+ {19, nullptr, "EraseProgramRedirectionForDebug"},
};
// clang-format on
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index b7b34ce7e..780ea30fe 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -198,9 +198,9 @@ public:
static const FunctionInfo functions[] = {
{0, nullptr, "Initialize"},
{1, nullptr, "Finalize"},
- {2, nullptr, "GetState"},
- {3, nullptr, "IsNfcEnabled"},
- {100, nullptr, "SetNfcEnabled"},
+ {2, nullptr, "GetStateOld"},
+ {3, nullptr, "IsNfcEnabledOld"},
+ {100, nullptr, "SetNfcEnabledOld"},
{400, nullptr, "InitializeSystem"},
{401, nullptr, "FinalizeSystem"},
{402, nullptr, "GetState"},
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 767158444..01ddcdbd6 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -177,7 +177,8 @@ private:
void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_NIFM, "called");
- ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, "NetworkProfileData is not the correct size");
+ ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c,
+ "SfNetworkProfileData is not the correct size");
u128 uuid{};
auto buffer = ctx.ReadBuffer();
std::memcpy(&uuid, buffer.data() + 8, sizeof(u128));
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index cc2192e5c..fba89e7a6 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input,
case IoctlCommand::IocGetCharacteristicsCommand:
return GetCharacteristics(input, output, output2, version);
case IoctlCommand::IocGetTPCMasksCommand:
- return GetTPCMasks(input, output);
+ return GetTPCMasks(input, output, output2, version);
case IoctlCommand::IocGetActiveSlotMaskCommand:
return GetActiveSlotMask(input, output);
case IoctlCommand::IocZcullGetCtxSizeCommand:
@@ -98,17 +98,22 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
return 0;
}
-u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) {
+u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
+ std::vector<u8>& output2, IoctlVersion version) {
IoctlGpuGetTpcMasksArgs params{};
std::memcpy(&params, input.data(), input.size());
- LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size,
- params.mask_buf_addr);
- // TODO(ogniK): Confirm value on hardware
- if (params.mask_buf_size)
- params.tpc_mask_size = 4 * 1; // 4 * num_gpc
- else
- params.tpc_mask_size = 0;
- std::memcpy(output.data(), &params, sizeof(params));
+ LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
+ if (params.mask_buffer_size != 0) {
+ params.tcp_mask = 3;
+ }
+
+ if (version == IoctlVersion::Version3) {
+ std::memcpy(output.data(), input.data(), output.size());
+ std::memcpy(output2.data(), &params.tcp_mask, output2.size());
+ } else {
+ std::memcpy(output.data(), &params, output.size());
+ }
+
return 0;
}
@@ -195,8 +200,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
IoctlGetGpuTime params{};
std::memcpy(&params, input.data(), input.size());
- const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks());
- params.gpu_time = static_cast<u64_le>(ns.count());
+ params.gpu_time = static_cast<u64_le>(system.CoreTiming().GetGlobalTimeNs().count());
std::memcpy(output.data(), &params, output.size());
return 0;
}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 07b644ec5..ef60f72ce 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -92,16 +92,11 @@ private:
"IoctlCharacteristics is incorrect size");
struct IoctlGpuGetTpcMasksArgs {
- /// [in] TPC mask buffer size reserved by userspace. Should be at least
- /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC.
- /// [out] full kernel buffer size
- u32_le mask_buf_size;
- u32_le reserved;
-
- /// [in] pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
- /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
- u64_le mask_buf_addr;
- u64_le tpc_mask_size; // Nintendo add this?
+ u32_le mask_buffer_size{};
+ INSERT_PADDING_WORDS(1);
+ u64_le mask_buffer_address{};
+ u32_le tcp_mask{};
+ INSERT_PADDING_WORDS(1);
};
static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
"IoctlGpuGetTpcMasksArgs is incorrect size");
@@ -166,7 +161,8 @@ private:
u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
std::vector<u8>& output2, IoctlVersion version);
- u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
+ u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2,
+ IoctlVersion version);
u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index f1e3d832a..caca80dde 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -138,9 +138,7 @@ u32 BufferQueue::Query(QueryType type) {
switch (type) {
case QueryType::NativeWindowFormat:
- // TODO(Subv): Use an enum for this
- static constexpr u32 FormatABGR8 = 1;
- return FormatABGR8;
+ return static_cast<u32>(PixelFormat::RGBA8888);
}
UNIMPLEMENTED();
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index d5f31e567..8a837e5aa 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -66,6 +66,16 @@ public:
Rotate270 = 0x07,
};
+ enum class PixelFormat : u32 {
+ RGBA8888 = 1,
+ RGBX8888 = 2,
+ RGB888 = 3,
+ RGB565 = 4,
+ BGRA8888 = 5,
+ RGBA5551 = 6,
+ RRGBA4444 = 7,
+ };
+
struct Buffer {
enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 };
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 437bc5dee..2f44d3779 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -9,6 +9,7 @@
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
+#include "common/thread.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
@@ -27,8 +28,35 @@
namespace Service::NVFlinger {
-constexpr s64 frame_ticks = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);
-constexpr s64 frame_ticks_30fps = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 30);
+constexpr s64 frame_ticks = static_cast<s64>(1000000000 / 60);
+constexpr s64 frame_ticks_30fps = static_cast<s64>(1000000000 / 30);
+
+void NVFlinger::VSyncThread(NVFlinger& nv_flinger) {
+ nv_flinger.SplitVSync();
+}
+
+void NVFlinger::SplitVSync() {
+ system.RegisterHostThread();
+ std::string name = "yuzu:VSyncThread";
+ MicroProfileOnThreadCreate(name.c_str());
+ Common::SetCurrentThreadName(name.c_str());
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+ s64 delay = 0;
+ while (is_running) {
+ guard->lock();
+ const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count();
+ Compose();
+ const auto ticks = GetNextTicks();
+ const s64 time_end = system.CoreTiming().GetGlobalTimeNs().count();
+ const s64 time_passed = time_end - time_start;
+ const s64 next_time = std::max<s64>(0, ticks - time_passed - delay);
+ guard->unlock();
+ if (next_time > 0) {
+ wait_event->WaitFor(std::chrono::nanoseconds{next_time});
+ }
+ delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time;
+ }
+}
NVFlinger::NVFlinger(Core::System& system) : system(system) {
displays.emplace_back(0, "Default", system);
@@ -36,22 +64,36 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
displays.emplace_back(2, "Edid", system);
displays.emplace_back(3, "Internal", system);
displays.emplace_back(4, "Null", system);
+ guard = std::make_shared<std::mutex>();
// Schedule the screen composition events
composition_event =
- Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 cycles_late) {
+ Core::Timing::CreateEvent("ScreenComposition", [this](u64 userdata, s64 ns_late) {
+ Lock();
Compose();
- const auto ticks =
- Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks();
- this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late),
+ const auto ticks = GetNextTicks();
+ this->system.CoreTiming().ScheduleEvent(std::max<s64>(0LL, ticks - ns_late),
composition_event);
});
-
- system.CoreTiming().ScheduleEvent(frame_ticks, composition_event);
+ if (system.IsMulticore()) {
+ is_running = true;
+ wait_event = std::make_unique<Common::Event>();
+ vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this));
+ } else {
+ system.CoreTiming().ScheduleEvent(frame_ticks, composition_event);
+ }
}
NVFlinger::~NVFlinger() {
- system.CoreTiming().UnscheduleEvent(composition_event, 0);
+ if (system.IsMulticore()) {
+ is_running = false;
+ wait_event->Set();
+ vsync_thread->join();
+ vsync_thread.reset();
+ wait_event.reset();
+ } else {
+ system.CoreTiming().UnscheduleEvent(composition_event, 0);
+ }
}
void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
@@ -199,10 +241,12 @@ void NVFlinger::Compose() {
auto& gpu = system.GPU();
const auto& multi_fence = buffer->get().multi_fence;
+ guard->unlock();
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
const auto& fence = multi_fence.fences[fence_id];
gpu.WaitFence(fence.id, fence.value);
}
+ guard->lock();
MicroProfileFlip();
@@ -223,7 +267,7 @@ void NVFlinger::Compose() {
s64 NVFlinger::GetNextTicks() const {
constexpr s64 max_hertz = 120LL;
- return (Core::Hardware::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
+ return (1000000000 * (1LL << swap_interval)) / max_hertz;
}
} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 57a21f33b..e4959a9af 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -4,15 +4,22 @@
#pragma once
+#include <atomic>
#include <memory>
+#include <mutex>
#include <optional>
#include <string>
#include <string_view>
+#include <thread>
#include <vector>
#include "common/common_types.h"
#include "core/hle/kernel/object.h"
+namespace Common {
+class Event;
+} // namespace Common
+
namespace Core::Timing {
class CoreTiming;
struct EventType;
@@ -79,6 +86,10 @@ public:
s64 GetNextTicks() const;
+ std::unique_lock<std::mutex> Lock() {
+ return std::unique_lock{*guard};
+ }
+
private:
/// Finds the display identified by the specified ID.
VI::Display* FindDisplay(u64 display_id);
@@ -92,6 +103,10 @@ private:
/// Finds the layer identified by the specified ID in the desired display.
const VI::Layer* FindLayer(u64 display_id, u64 layer_id) const;
+ static void VSyncThread(NVFlinger& nv_flinger);
+
+ void SplitVSync();
+
std::shared_ptr<Nvidia::Module> nvdrv;
std::vector<VI::Display> displays;
@@ -108,7 +123,13 @@ private:
/// Event that handles screen composition.
std::shared_ptr<Core::Timing::EventType> composition_event;
+ std::shared_ptr<std::mutex> guard;
+
Core::System& system;
+
+ std::unique_ptr<std::thread> vsync_thread;
+ std::unique_ptr<Common::Event> wait_event;
+ std::atomic<bool> is_running{};
};
} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/prepo/prepo.cpp b/src/core/hle/service/prepo/prepo.cpp
index 14309c679..67833d9af 100644
--- a/src/core/hle/service/prepo/prepo.cpp
+++ b/src/core/hle/service/prepo/prepo.cpp
@@ -75,8 +75,13 @@ private:
const auto user_id = rp.PopRaw<u128>();
const auto process_id = rp.PopRaw<u64>();
std::vector<std::vector<u8>> data{ctx.ReadBuffer(0)};
+
if constexpr (Type == Core::Reporter::PlayReportType::Old2) {
- data.emplace_back(ctx.ReadBuffer(1));
+ const auto read_buffer_count =
+ ctx.BufferDescriptorX().size() + ctx.BufferDescriptorA().size();
+ if (read_buffer_count > 1) {
+ data.emplace_back(ctx.ReadBuffer(1));
+ }
}
LOG_DEBUG(
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index 6ada13be4..d872de16c 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -142,7 +142,7 @@ void SM::GetService(Kernel::HLERequestContext& ctx) {
}
// Wake the threads waiting on the ServerPort
- server_port->WakeupAllWaitingThreads();
+ server_port->Signal();
LOG_DEBUG(Service_SM, "called service={} -> session={}", name, client->GetObjectId());
IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles};
diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp
index 1575f0b49..59a272f4a 100644
--- a/src/core/hle/service/time/standard_steady_clock_core.cpp
+++ b/src/core/hle/service/time/standard_steady_clock_core.cpp
@@ -11,9 +11,8 @@
namespace Service::Time::Clock {
TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) {
- const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
- Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
- Core::Hardware::CNTFREQ)};
+ const TimeSpanType ticks_time_span{
+ TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds};
if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) {
diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
index 44d5bc651..8baaa2a6a 100644
--- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp
+++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp
@@ -11,9 +11,8 @@
namespace Service::Time::Clock {
SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) {
- const TimeSpanType ticks_time_span{TimeSpanType::FromTicks(
- Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
- Core::Hardware::CNTFREQ)};
+ const TimeSpanType ticks_time_span{
+ TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
return {ticks_time_span.ToSeconds(), GetClockSourceId()};
}
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp
index 67f1bbcf3..4cf58a61a 100644
--- a/src/core/hle/service/time/time.cpp
+++ b/src/core/hle/service/time/time.cpp
@@ -234,9 +234,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(Kernel::HLERe
const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)};
if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) {
- const auto ticks{Clock::TimeSpanType::FromTicks(
- Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
- Core::Hardware::CNTFREQ)};
+ const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(),
+ Core::Hardware::CNTFREQ)};
const s64 base_time_point{context.offset + current_time_point.time_point -
ticks.ToSeconds()};
IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2};
diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp
index 9d6c55865..b4dfe45e5 100644
--- a/src/core/hle/service/time/time_manager.cpp
+++ b/src/core/hle/service/time/time_manager.cpp
@@ -5,6 +5,7 @@
#include <chrono>
#include <ctime>
+#include "common/time_zone.h"
#include "core/hle/service/time/ephemeral_network_system_clock_context_writer.h"
#include "core/hle/service/time/local_system_clock_context_writer.h"
#include "core/hle/service/time/network_system_clock_context_writer.h"
@@ -21,8 +22,16 @@ static std::chrono::seconds GetSecondsSinceEpoch() {
Settings::values.custom_rtc_differential;
}
+static s64 GetExternalTimeZoneOffset() {
+ // With "auto" timezone setting, we use the external system's timezone offset
+ if (Settings::GetTimeZoneString() == "auto") {
+ return Common::TimeZone::GetCurrentOffsetSeconds().count();
+ }
+ return 0;
+}
+
static s64 GetExternalRtcValue() {
- return GetSecondsSinceEpoch().count();
+ return GetSecondsSinceEpoch().count() + GetExternalTimeZoneOffset();
}
TimeManager::TimeManager(Core::System& system)
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp
index 999ec1e51..e0ae9f874 100644
--- a/src/core/hle/service/time/time_sharedmemory.cpp
+++ b/src/core/hle/service/time/time_sharedmemory.cpp
@@ -30,8 +30,7 @@ void SharedMemory::SetupStandardSteadyClock(Core::System& system,
const Common::UUID& clock_source_id,
Clock::TimeSpanType current_time_point) {
const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks(
- Core::Timing::CpuCyclesToClockCycles(system.CoreTiming().GetTicks()),
- Core::Hardware::CNTFREQ)};
+ system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)};
const Clock::SteadyClockContext context{
static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds),
clock_source_id};
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp
index 78d4acd95..c070d6e97 100644
--- a/src/core/hle/service/time/time_zone_content_manager.cpp
+++ b/src/core/hle/service/time/time_zone_content_manager.cpp
@@ -5,6 +5,7 @@
#include <sstream>
#include "common/logging/log.h"
+#include "common/time_zone.h"
#include "core/core.h"
#include "core/file_sys/content_archive.h"
#include "core/file_sys/nca_metadata.h"
@@ -14,6 +15,7 @@
#include "core/hle/service/filesystem/filesystem.h"
#include "core/hle/service/time/time_manager.h"
#include "core/hle/service/time/time_zone_content_manager.h"
+#include "core/settings.h"
namespace Service::Time::TimeZone {
@@ -68,10 +70,22 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) {
TimeZoneContentManager::TimeZoneContentManager(TimeManager& time_manager, Core::System& system)
: system{system}, location_name_cache{BuildLocationNameCache(system)} {
- if (FileSys::VirtualFile vfs_file; GetTimeZoneInfoFile("GMT", vfs_file) == RESULT_SUCCESS) {
+
+ std::string location_name;
+ const auto timezone_setting = Settings::GetTimeZoneString();
+ if (timezone_setting == "auto") {
+ location_name = Common::TimeZone::GetDefaultTimeZone();
+ } else if (timezone_setting == "default") {
+ location_name = location_name;
+ } else {
+ location_name = timezone_setting;
+ }
+
+ if (FileSys::VirtualFile vfs_file;
+ GetTimeZoneInfoFile(location_name, vfs_file) == RESULT_SUCCESS) {
const auto time_point{
time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(system)};
- time_manager.SetupTimeZoneManager("GMT", time_point, location_name_cache.size(), {},
+ time_manager.SetupTimeZoneManager(location_name, time_point, location_name_cache.size(), {},
vfs_file);
} else {
time_zone_manager.MarkAsInitialized();
@@ -114,6 +128,12 @@ ResultCode TimeZoneContentManager::GetTimeZoneInfoFile(const std::string& locati
vfs_file = zoneinfo_dir->GetFile(location_name);
if (!vfs_file) {
+ LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"! Using default timezone.",
+ time_zone_binary_titleid, location_name);
+ vfs_file = zoneinfo_dir->GetFile(Common::TimeZone::GetDefaultTimeZone());
+ }
+
+ if (!vfs_file) {
LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"!", time_zone_binary_titleid,
location_name);
return ERROR_TIME_NOT_FOUND;
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 46e14c2a3..157092074 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -511,6 +511,7 @@ private:
LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
static_cast<u32>(transaction), flags);
+ nv_flinger->Lock();
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
switch (transaction) {
@@ -550,6 +551,7 @@ private:
[=](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
Kernel::ThreadWakeupReason reason) {
// Repeat TransactParcel DequeueBuffer when a buffer is available
+ nv_flinger->Lock();
auto& buffer_queue = nv_flinger->FindBufferQueue(id);
auto result = buffer_queue.DequeueBuffer(width, height);
ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 9d87045a0..7def00768 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -8,6 +8,7 @@
#include <utility>
#include "common/assert.h"
+#include "common/atomic_ops.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/page_table.h"
@@ -29,15 +30,12 @@ namespace Core::Memory {
struct Memory::Impl {
explicit Impl(Core::System& system_) : system{system_} {}
- void SetCurrentPageTable(Kernel::Process& process) {
+ void SetCurrentPageTable(Kernel::Process& process, u32 core_id) {
current_page_table = &process.PageTable().PageTableImpl();
const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth();
- system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width);
- system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width);
- system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width);
- system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
+ system.ArmInterface(core_id).PageTableChanged(*current_page_table, address_space_width);
}
void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) {
@@ -179,6 +177,22 @@ struct Memory::Impl {
}
}
+ bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) {
+ return WriteExclusive<u8>(addr, data, expected);
+ }
+
+ bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) {
+ return WriteExclusive<u16_le>(addr, data, expected);
+ }
+
+ bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) {
+ return WriteExclusive<u32_le>(addr, data, expected);
+ }
+
+ bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) {
+ return WriteExclusive<u64_le>(addr, data, expected);
+ }
+
std::string ReadCString(VAddr vaddr, std::size_t max_length) {
std::string string;
string.reserve(max_length);
@@ -682,6 +696,67 @@ struct Memory::Impl {
}
}
+ template <typename T>
+ bool WriteExclusive(const VAddr vaddr, const T data, const T expected) {
+ u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
+ if (page_pointer != nullptr) {
+ // NOTE: Avoid adding any extra logic to this fast-path block
+ T volatile* pointer = reinterpret_cast<T volatile*>(&page_pointer[vaddr]);
+ return Common::AtomicCompareAndSwap(pointer, data, expected);
+ }
+
+ const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+ switch (type) {
+ case Common::PageType::Unmapped:
+ LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
+ static_cast<u32>(data), vaddr);
+ return true;
+ case Common::PageType::Memory:
+ ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
+ break;
+ case Common::PageType::RasterizerCachedMemory: {
+ u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
+ system.GPU().InvalidateRegion(vaddr, sizeof(T));
+ T volatile* pointer = reinterpret_cast<T volatile*>(&host_ptr);
+ return Common::AtomicCompareAndSwap(pointer, data, expected);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+ return true;
+ }
+
+ bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) {
+ u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
+ if (page_pointer != nullptr) {
+ // NOTE: Avoid adding any extra logic to this fast-path block
+ u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&page_pointer[vaddr]);
+ return Common::AtomicCompareAndSwap(pointer, data, expected);
+ }
+
+ const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
+ switch (type) {
+ case Common::PageType::Unmapped:
+ LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8,
+ static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr);
+ return true;
+ case Common::PageType::Memory:
+ ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
+ break;
+ case Common::PageType::RasterizerCachedMemory: {
+ u8* host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
+ system.GPU().InvalidateRegion(vaddr, sizeof(u128));
+ u64 volatile* pointer = reinterpret_cast<u64 volatile*>(&host_ptr);
+ return Common::AtomicCompareAndSwap(pointer, data, expected);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+ return true;
+ }
+
Common::PageTable* current_page_table = nullptr;
Core::System& system;
};
@@ -689,8 +764,8 @@ struct Memory::Impl {
Memory::Memory(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
Memory::~Memory() = default;
-void Memory::SetCurrentPageTable(Kernel::Process& process) {
- impl->SetCurrentPageTable(process);
+void Memory::SetCurrentPageTable(Kernel::Process& process, u32 core_id) {
+ impl->SetCurrentPageTable(process, core_id);
}
void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) {
@@ -764,6 +839,26 @@ void Memory::Write64(VAddr addr, u64 data) {
impl->Write64(addr, data);
}
+bool Memory::WriteExclusive8(VAddr addr, u8 data, u8 expected) {
+ return impl->WriteExclusive8(addr, data, expected);
+}
+
+bool Memory::WriteExclusive16(VAddr addr, u16 data, u16 expected) {
+ return impl->WriteExclusive16(addr, data, expected);
+}
+
+bool Memory::WriteExclusive32(VAddr addr, u32 data, u32 expected) {
+ return impl->WriteExclusive32(addr, data, expected);
+}
+
+bool Memory::WriteExclusive64(VAddr addr, u64 data, u64 expected) {
+ return impl->WriteExclusive64(addr, data, expected);
+}
+
+bool Memory::WriteExclusive128(VAddr addr, u128 data, u128 expected) {
+ return impl->WriteExclusive128(addr, data, expected);
+}
+
std::string Memory::ReadCString(VAddr vaddr, std::size_t max_length) {
return impl->ReadCString(vaddr, max_length);
}
diff --git a/src/core/memory.h b/src/core/memory.h
index 9292f3b0a..4a1cc63f4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -64,7 +64,7 @@ public:
*
* @param process The process to use the page table of.
*/
- void SetCurrentPageTable(Kernel::Process& process);
+ void SetCurrentPageTable(Kernel::Process& process, u32 core_id);
/**
* Maps an allocated buffer onto a region of the emulated process address space.
@@ -245,6 +245,71 @@ public:
void Write64(VAddr addr, u64 data);
/**
+ * Writes a 8-bit unsigned integer to the given virtual address in
+ * the current process' address space if and only if the address contains
+ * the expected value. This operation is atomic.
+ *
+ * @param addr The virtual address to write the 8-bit unsigned integer to.
+ * @param data The 8-bit unsigned integer to write to the given virtual address.
+ * @param expected The 8-bit unsigned integer to check against the given virtual address.
+ *
+ * @post The memory range [addr, sizeof(data)) contains the given data value.
+ */
+ bool WriteExclusive8(VAddr addr, u8 data, u8 expected);
+
+ /**
+ * Writes a 16-bit unsigned integer to the given virtual address in
+ * the current process' address space if and only if the address contains
+ * the expected value. This operation is atomic.
+ *
+ * @param addr The virtual address to write the 16-bit unsigned integer to.
+ * @param data The 16-bit unsigned integer to write to the given virtual address.
+ * @param expected The 16-bit unsigned integer to check against the given virtual address.
+ *
+ * @post The memory range [addr, sizeof(data)) contains the given data value.
+ */
+ bool WriteExclusive16(VAddr addr, u16 data, u16 expected);
+
+ /**
+ * Writes a 32-bit unsigned integer to the given virtual address in
+ * the current process' address space if and only if the address contains
+ * the expected value. This operation is atomic.
+ *
+ * @param addr The virtual address to write the 32-bit unsigned integer to.
+ * @param data The 32-bit unsigned integer to write to the given virtual address.
+ * @param expected The 32-bit unsigned integer to check against the given virtual address.
+ *
+ * @post The memory range [addr, sizeof(data)) contains the given data value.
+ */
+ bool WriteExclusive32(VAddr addr, u32 data, u32 expected);
+
+ /**
+ * Writes a 64-bit unsigned integer to the given virtual address in
+ * the current process' address space if and only if the address contains
+ * the expected value. This operation is atomic.
+ *
+ * @param addr The virtual address to write the 64-bit unsigned integer to.
+ * @param data The 64-bit unsigned integer to write to the given virtual address.
+ * @param expected The 64-bit unsigned integer to check against the given virtual address.
+ *
+ * @post The memory range [addr, sizeof(data)) contains the given data value.
+ */
+ bool WriteExclusive64(VAddr addr, u64 data, u64 expected);
+
+ /**
+ * Writes a 128-bit unsigned integer to the given virtual address in
+ * the current process' address space if and only if the address contains
+ * the expected value. This operation is atomic.
+ *
+ * @param addr The virtual address to write the 128-bit unsigned integer to.
+ * @param data The 128-bit unsigned integer to write to the given virtual address.
+ * @param expected The 128-bit unsigned integer to check against the given virtual address.
+ *
+ * @post The memory range [addr, sizeof(data)) contains the given data value.
+ */
+ bool WriteExclusive128(VAddr addr, u128 data, u128 expected);
+
+ /**
* Reads a null-terminated string from the given virtual address.
* This function will continually read characters until either:
*
diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp
index b139e8465..53d27859b 100644
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -20,7 +20,7 @@
namespace Core::Memory {
-constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 12);
+constexpr s64 CHEAT_ENGINE_TICKS = static_cast<s64>(1000000000 / 12);
constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
StandardVmCallbacks::StandardVmCallbacks(Core::System& system, const CheatProcessMetadata& metadata)
@@ -190,7 +190,7 @@ CheatEngine::~CheatEngine() {
void CheatEngine::Initialize() {
event = Core::Timing::CreateEvent(
"CheatEngine::FrameCallback::" + Common::HexToString(metadata.main_nso_build_id),
- [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
+ [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); });
core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS, event);
metadata.process_id = system.CurrentProcess()->GetProcessID();
@@ -217,7 +217,7 @@ void CheatEngine::Reload(std::vector<CheatEntry> cheats) {
MICROPROFILE_DEFINE(Cheat_Engine, "Add-Ons", "Cheat Engine", MP_RGB(70, 200, 70));
-void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
+void CheatEngine::FrameCallback(u64 userdata, s64 ns_late) {
if (is_pending_reload.exchange(false)) {
vm.LoadProgram(cheats);
}
@@ -230,7 +230,7 @@ void CheatEngine::FrameCallback(u64 userdata, s64 cycles_late) {
vm.Execute(metadata);
- core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - cycles_late, event);
+ core_timing.ScheduleEvent(CHEAT_ENGINE_TICKS - ns_late, event);
}
} // namespace Core::Memory
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index f1ae9d4df..9f3a6b811 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -119,7 +119,7 @@ double PerfStats::GetLastFrameTimeScale() {
}
void FrameLimiter::DoFrameLimiting(microseconds current_system_time_us) {
- if (!Settings::values.use_frame_limit) {
+ if (!Settings::values.use_frame_limit || Settings::values.use_multi_core) {
return;
}
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index 558cbe6d7..76cfa5a17 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -4,11 +4,12 @@
#include <ctime>
#include <fstream>
+#include <iomanip>
#include <fmt/chrono.h>
#include <fmt/format.h>
#include <fmt/ostream.h>
-#include <json.hpp>
+#include <nlohmann/json.hpp>
#include "common/file_util.h"
#include "common/hex_util.h"
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2b0bdc4d3..56df5e925 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -63,6 +63,21 @@ const std::array<const char*, NumMouseButtons> mapping = {{
Values values = {};
+std::string GetTimeZoneString() {
+ static constexpr std::array<const char*, 46> timezones{{
+ "auto", "default", "CET", "CST6CDT", "Cuba", "EET", "Egypt", "Eire",
+ "EST", "EST5EDT", "GB", "GB-Eire", "GMT", "GMT+0", "GMT-0", "GMT0",
+ "Greenwich", "Hongkong", "HST", "Iceland", "Iran", "Israel", "Jamaica", "Japan",
+ "Kwajalein", "Libya", "MET", "MST", "MST7MDT", "Navajo", "NZ", "NZ-CHAT",
+ "Poland", "Portugal", "PRC", "PST8PDT", "ROC", "ROK", "Singapore", "Turkey",
+ "UCT", "Universal", "UTC", "W-SU", "WET", "Zulu",
+ }};
+
+ ASSERT(Settings::values.time_zone_index < timezones.size());
+
+ return timezones[Settings::values.time_zone_index];
+}
+
void Apply() {
GDBStub::SetServerPort(values.gdbstub_port);
GDBStub::ToggleServer(values.use_gdbstub);
@@ -87,6 +102,7 @@ void LogSettings() {
LogSetting("System_CurrentUser", Settings::values.current_user);
LogSetting("System_LanguageIndex", Settings::values.language_index);
LogSetting("System_RegionIndex", Settings::values.region_index);
+ LogSetting("System_TimeZoneIndex", Settings::values.time_zone_index);
LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
@@ -96,6 +112,7 @@ void LogSettings() {
LogSetting("Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation);
LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
+ LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy);
LogSetting("Audio_OutputEngine", Settings::values.sink_id);
LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
@@ -110,6 +127,13 @@ void LogSettings() {
LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
}
+float Volume() {
+ if (values.audio_muted) {
+ return 0.0f;
+ }
+ return values.volume;
+}
+
bool IsGPULevelExtreme() {
return values.gpu_accuracy == GPUAccuracy::Extreme;
}
diff --git a/src/core/settings.h b/src/core/settings.h
index 163900f0b..a598ccbc1 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -394,6 +394,7 @@ struct Values {
s32 current_user;
s32 language_index;
s32 region_index;
+ s32 time_zone_index;
s32 sound_index;
// Controls
@@ -436,7 +437,7 @@ struct Values {
bool renderer_debug;
int vulkan_device;
- float resolution_factor;
+ u16 resolution_factor{1};
int aspect_ratio;
int max_anisotropy;
bool use_frame_limit;
@@ -445,6 +446,7 @@ struct Values {
GPUAccuracy gpu_accuracy;
bool use_asynchronous_gpu_emulation;
bool use_vsync;
+ bool use_assembly_shaders;
bool force_30fps_mode;
bool use_fast_gpu_time;
@@ -457,6 +459,7 @@ struct Values {
bool use_dev_keys;
// Audio
+ bool audio_muted;
std::string sink_id;
bool enable_audio_stretching;
std::string audio_device_id;
@@ -472,6 +475,7 @@ struct Values {
bool reporting_services;
bool quest_flag;
bool disable_cpu_opt;
+ bool disable_macro_jit;
// BCAT
std::string bcat_backend;
@@ -487,9 +491,14 @@ struct Values {
std::map<u64, std::vector<std::string>> disabled_addons;
} extern values;
+float Volume();
+
bool IsGPULevelExtreme();
bool IsGPULevelHigh();
+std::string GetTimeZoneString();
+
void Apply();
void LogSettings();
+
} // namespace Settings
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 1c3b03a1c..c781b3cfc 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -201,6 +201,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation);
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
+ AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
}
diff --git a/src/core/tools/freezer.cpp b/src/core/tools/freezer.cpp
index b2c6c537e..8b0c50d11 100644
--- a/src/core/tools/freezer.cpp
+++ b/src/core/tools/freezer.cpp
@@ -14,7 +14,7 @@
namespace Tools {
namespace {
-constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(Core::Hardware::BASE_CLOCK_RATE / 60);
+constexpr s64 MEMORY_FREEZER_TICKS = static_cast<s64>(1000000000 / 60);
u64 MemoryReadWidth(Core::Memory::Memory& memory, u32 width, VAddr addr) {
switch (width) {
@@ -57,7 +57,7 @@ Freezer::Freezer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& m
: core_timing{core_timing_}, memory{memory_} {
event = Core::Timing::CreateEvent(
"MemoryFreezer::FrameCallback",
- [this](u64 userdata, s64 cycles_late) { FrameCallback(userdata, cycles_late); });
+ [this](u64 userdata, s64 ns_late) { FrameCallback(userdata, ns_late); });
core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS, event);
}
@@ -158,7 +158,7 @@ std::vector<Freezer::Entry> Freezer::GetEntries() const {
return entries;
}
-void Freezer::FrameCallback(u64 userdata, s64 cycles_late) {
+void Freezer::FrameCallback(u64 userdata, s64 ns_late) {
if (!IsActive()) {
LOG_DEBUG(Common_Memory, "Memory freezer has been deactivated, ending callback events.");
return;
@@ -173,7 +173,7 @@ void Freezer::FrameCallback(u64 userdata, s64 cycles_late) {
MemoryWriteWidth(memory, entry.width, entry.address, entry.value);
}
- core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - cycles_late, event);
+ core_timing.ScheduleEvent(MEMORY_FREEZER_TICKS - ns_late, event);
}
void Freezer::FillEntryReads() {
diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp
index 078374be5..afb8e6612 100644
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -76,7 +76,7 @@ std::unique_ptr<Input::ButtonDevice> Keyboard::Create(const Common::ParamPackage
int key_code = params.Get("code", 0);
std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list);
key_button_list->AddKeyButton(key_code, button.get());
- return std::move(button);
+ return button;
}
void Keyboard::PressKey(int key_code) {
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 868251628..d4cdf76a3 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -145,7 +145,7 @@ std::unique_ptr<Input::MotionDevice> MotionEmu::Create(const Common::ParamPackag
// Previously created device is disconnected here. Having two motion devices for 3DS is not
// expected.
current_device = device_wrapper->device;
- return std::move(device_wrapper);
+ return device_wrapper;
}
void MotionEmu::BeginTilt(int x, int y) {
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index c7038b217..47ef30aa9 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,6 +1,7 @@
add_executable(tests
common/bit_field.cpp
common/bit_utils.cpp
+ common/fibers.cpp
common/multi_level_queue.cpp
common/param_package.cpp
common/ring_buffer.cpp
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
new file mode 100644
index 000000000..4fd92428f
--- /dev/null
+++ b/src/tests/common/fibers.cpp
@@ -0,0 +1,358 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <atomic>
+#include <cstdlib>
+#include <functional>
+#include <memory>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include <catch2/catch.hpp>
+#include <math.h>
+#include "common/common_types.h"
+#include "common/fiber.h"
+#include "common/spin_lock.h"
+
+namespace Common {
+
+class TestControl1 {
+public:
+ TestControl1() = default;
+
+ void DoWork();
+
+ void ExecuteThread(u32 id);
+
+ std::unordered_map<std::thread::id, u32> ids;
+ std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+ std::vector<std::shared_ptr<Common::Fiber>> work_fibers;
+ std::vector<u32> items;
+ std::vector<u32> results;
+};
+
+static void WorkControl1(void* control) {
+ auto* test_control = static_cast<TestControl1*>(control);
+ test_control->DoWork();
+}
+
+void TestControl1::DoWork() {
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ u32 value = items[id];
+ for (u32 i = 0; i < id; i++) {
+ value++;
+ }
+ results[id] = value;
+ Fiber::YieldTo(work_fibers[id], thread_fibers[id]);
+}
+
+void TestControl1::ExecuteThread(u32 id) {
+ std::thread::id this_id = std::this_thread::get_id();
+ ids[this_id] = id;
+ auto thread_fiber = Fiber::ThreadToFiber();
+ thread_fibers[id] = thread_fiber;
+ work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this);
+ items[id] = rand() % 256;
+ Fiber::YieldTo(thread_fibers[id], work_fibers[id]);
+ thread_fibers[id]->Exit();
+}
+
+static void ThreadStart1(u32 id, TestControl1& test_control) {
+ test_control.ExecuteThread(id);
+}
+
+/** This test checks for fiber setup configuration and validates that fibers are
+ * doing all the work required.
+ */
+TEST_CASE("Fibers::Setup", "[common]") {
+ constexpr std::size_t num_threads = 7;
+ TestControl1 test_control{};
+ test_control.thread_fibers.resize(num_threads);
+ test_control.work_fibers.resize(num_threads);
+ test_control.items.resize(num_threads, 0);
+ test_control.results.resize(num_threads, 0);
+ std::vector<std::thread> threads;
+ for (u32 i = 0; i < num_threads; i++) {
+ threads.emplace_back(ThreadStart1, i, std::ref(test_control));
+ }
+ for (u32 i = 0; i < num_threads; i++) {
+ threads[i].join();
+ }
+ for (u32 i = 0; i < num_threads; i++) {
+ REQUIRE(test_control.items[i] + i == test_control.results[i]);
+ }
+}
+
+class TestControl2 {
+public:
+ TestControl2() = default;
+
+ void DoWork1() {
+ trap2 = false;
+ while (trap.load())
+ ;
+ for (u32 i = 0; i < 12000; i++) {
+ value1 += i;
+ }
+ Fiber::YieldTo(fiber1, fiber3);
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ assert1 = id == 1;
+ value2 += 5000;
+ Fiber::YieldTo(fiber1, thread_fibers[id]);
+ }
+
+ void DoWork2() {
+ while (trap2.load())
+ ;
+ value2 = 2000;
+ trap = false;
+ Fiber::YieldTo(fiber2, fiber1);
+ assert3 = false;
+ }
+
+ void DoWork3() {
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ assert2 = id == 0;
+ value1 += 1000;
+ Fiber::YieldTo(fiber3, thread_fibers[id]);
+ }
+
+ void ExecuteThread(u32 id);
+
+ void CallFiber1() {
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ Fiber::YieldTo(thread_fibers[id], fiber1);
+ }
+
+ void CallFiber2() {
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ Fiber::YieldTo(thread_fibers[id], fiber2);
+ }
+
+ void Exit();
+
+ bool assert1{};
+ bool assert2{};
+ bool assert3{true};
+ u32 value1{};
+ u32 value2{};
+ std::atomic<bool> trap{true};
+ std::atomic<bool> trap2{true};
+ std::unordered_map<std::thread::id, u32> ids;
+ std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+ std::shared_ptr<Common::Fiber> fiber1;
+ std::shared_ptr<Common::Fiber> fiber2;
+ std::shared_ptr<Common::Fiber> fiber3;
+};
+
+static void WorkControl2_1(void* control) {
+ auto* test_control = static_cast<TestControl2*>(control);
+ test_control->DoWork1();
+}
+
+static void WorkControl2_2(void* control) {
+ auto* test_control = static_cast<TestControl2*>(control);
+ test_control->DoWork2();
+}
+
+static void WorkControl2_3(void* control) {
+ auto* test_control = static_cast<TestControl2*>(control);
+ test_control->DoWork3();
+}
+
+void TestControl2::ExecuteThread(u32 id) {
+ std::thread::id this_id = std::this_thread::get_id();
+ ids[this_id] = id;
+ auto thread_fiber = Fiber::ThreadToFiber();
+ thread_fibers[id] = thread_fiber;
+}
+
+void TestControl2::Exit() {
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ thread_fibers[id]->Exit();
+}
+
+static void ThreadStart2_1(u32 id, TestControl2& test_control) {
+ test_control.ExecuteThread(id);
+ test_control.CallFiber1();
+ test_control.Exit();
+}
+
+static void ThreadStart2_2(u32 id, TestControl2& test_control) {
+ test_control.ExecuteThread(id);
+ test_control.CallFiber2();
+ test_control.Exit();
+}
+
+/** This test checks for fiber thread exchange configuration and validates that fibers are
+ * that a fiber has been succesfully transfered from one thread to another and that the TLS
+ * region of the thread is kept while changing fibers.
+ */
+TEST_CASE("Fibers::InterExchange", "[common]") {
+ TestControl2 test_control{};
+ test_control.thread_fibers.resize(2);
+ test_control.fiber1 =
+ std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
+ test_control.fiber2 =
+ std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
+ test_control.fiber3 =
+ std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
+ std::thread thread1(ThreadStart2_1, 0, std::ref(test_control));
+ std::thread thread2(ThreadStart2_2, 1, std::ref(test_control));
+ thread1.join();
+ thread2.join();
+ REQUIRE(test_control.assert1);
+ REQUIRE(test_control.assert2);
+ REQUIRE(test_control.assert3);
+ REQUIRE(test_control.value2 == 7000);
+ u32 cal_value = 0;
+ for (u32 i = 0; i < 12000; i++) {
+ cal_value += i;
+ }
+ cal_value += 1000;
+ REQUIRE(test_control.value1 == cal_value);
+}
+
+class TestControl3 {
+public:
+ TestControl3() = default;
+
+ void DoWork1() {
+ value1 += 1;
+ Fiber::YieldTo(fiber1, fiber2);
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ value3 += 1;
+ Fiber::YieldTo(fiber1, thread_fibers[id]);
+ }
+
+ void DoWork2() {
+ value2 += 1;
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ Fiber::YieldTo(fiber2, thread_fibers[id]);
+ }
+
+ void ExecuteThread(u32 id);
+
+ void CallFiber1() {
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ Fiber::YieldTo(thread_fibers[id], fiber1);
+ }
+
+ void Exit();
+
+ u32 value1{};
+ u32 value2{};
+ u32 value3{};
+ std::unordered_map<std::thread::id, u32> ids;
+ std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
+ std::shared_ptr<Common::Fiber> fiber1;
+ std::shared_ptr<Common::Fiber> fiber2;
+};
+
+static void WorkControl3_1(void* control) {
+ auto* test_control = static_cast<TestControl3*>(control);
+ test_control->DoWork1();
+}
+
+static void WorkControl3_2(void* control) {
+ auto* test_control = static_cast<TestControl3*>(control);
+ test_control->DoWork2();
+}
+
+void TestControl3::ExecuteThread(u32 id) {
+ std::thread::id this_id = std::this_thread::get_id();
+ ids[this_id] = id;
+ auto thread_fiber = Fiber::ThreadToFiber();
+ thread_fibers[id] = thread_fiber;
+}
+
+void TestControl3::Exit() {
+ std::thread::id this_id = std::this_thread::get_id();
+ u32 id = ids[this_id];
+ thread_fibers[id]->Exit();
+}
+
+static void ThreadStart3(u32 id, TestControl3& test_control) {
+ test_control.ExecuteThread(id);
+ test_control.CallFiber1();
+ test_control.Exit();
+}
+
+/** This test checks for one two threads racing for starting the same fiber.
+ * It checks execution occured in an ordered manner and by no time there were
+ * two contexts at the same time.
+ */
+TEST_CASE("Fibers::StartRace", "[common]") {
+ TestControl3 test_control{};
+ test_control.thread_fibers.resize(2);
+ test_control.fiber1 =
+ std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
+ test_control.fiber2 =
+ std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
+ std::thread thread1(ThreadStart3, 0, std::ref(test_control));
+ std::thread thread2(ThreadStart3, 1, std::ref(test_control));
+ thread1.join();
+ thread2.join();
+ REQUIRE(test_control.value1 == 1);
+ REQUIRE(test_control.value2 == 1);
+ REQUIRE(test_control.value3 == 1);
+}
+
+class TestControl4;
+
+static void WorkControl4(void* control);
+
+class TestControl4 {
+public:
+ TestControl4() {
+ fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl4}, this);
+ goal_reached = false;
+ rewinded = false;
+ }
+
+ void Execute() {
+ thread_fiber = Fiber::ThreadToFiber();
+ Fiber::YieldTo(thread_fiber, fiber1);
+ thread_fiber->Exit();
+ }
+
+ void DoWork() {
+ fiber1->SetRewindPoint(std::function<void(void*)>{WorkControl4}, this);
+ if (rewinded) {
+ goal_reached = true;
+ Fiber::YieldTo(fiber1, thread_fiber);
+ }
+ rewinded = true;
+ fiber1->Rewind();
+ }
+
+ std::shared_ptr<Common::Fiber> fiber1;
+ std::shared_ptr<Common::Fiber> thread_fiber;
+ bool goal_reached;
+ bool rewinded;
+};
+
+static void WorkControl4(void* control) {
+ auto* test_control = static_cast<TestControl4*>(control);
+ test_control->DoWork();
+}
+
+TEST_CASE("Fibers::Rewind", "[common]") {
+ TestControl4 test_control{};
+ test_control.Execute();
+ REQUIRE(test_control.goal_reached);
+ REQUIRE(test_control.rewinded);
+}
+
+} // namespace Common
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index ff2d11cc8..e66db1940 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -18,29 +18,26 @@ namespace {
// Numbers are chosen randomly to make sure the correct one is given.
constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
+constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
+std::array<s64, 5> delays{};
std::bitset<CB_IDS.size()> callbacks_ran_flags;
u64 expected_callback = 0;
-s64 lateness = 0;
template <unsigned int IDX>
-void CallbackTemplate(u64 userdata, s64 cycles_late) {
+void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
static_assert(IDX < CB_IDS.size(), "IDX out of range");
callbacks_ran_flags.set(IDX);
REQUIRE(CB_IDS[IDX] == userdata);
- REQUIRE(CB_IDS[IDX] == expected_callback);
- REQUIRE(lateness == cycles_late);
-}
-
-u64 callbacks_done = 0;
-
-void EmptyCallback(u64 userdata, s64 cycles_late) {
- ++callbacks_done;
+ REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
+ delays[IDX] = nanoseconds_late;
+ ++expected_callback;
}
struct ScopeInit final {
ScopeInit() {
- core_timing.Initialize();
+ core_timing.SetMulticore(true);
+ core_timing.Initialize([]() {});
}
~ScopeInit() {
core_timing.Shutdown();
@@ -49,110 +46,101 @@ struct ScopeInit final {
Core::Timing::CoreTiming core_timing;
};
-void AdvanceAndCheck(Core::Timing::CoreTiming& core_timing, u32 idx, u32 context = 0,
- int expected_lateness = 0, int cpu_downcount = 0) {
- callbacks_ran_flags = 0;
- expected_callback = CB_IDS[idx];
- lateness = expected_lateness;
-
- // Pretend we executed X cycles of instructions.
- core_timing.SwitchContext(context);
- core_timing.AddTicks(core_timing.GetDowncount() - cpu_downcount);
- core_timing.Advance();
- core_timing.SwitchContext((context + 1) % 4);
+#pragma optimize("", off)
- REQUIRE(decltype(callbacks_ran_flags)().set(idx) == callbacks_ran_flags);
+u64 TestTimerSpeed(Core::Timing::CoreTiming& core_timing) {
+ u64 start = core_timing.GetGlobalTimeNs().count();
+ u64 placebo = 0;
+ for (std::size_t i = 0; i < 1000; i++) {
+ placebo += core_timing.GetGlobalTimeNs().count();
+ }
+ u64 end = core_timing.GetGlobalTimeNs().count();
+ return (end - start);
}
+
+#pragma optimize("", on)
+
} // Anonymous namespace
TEST_CASE("CoreTiming[BasicOrder]", "[core]") {
ScopeInit guard;
auto& core_timing = guard.core_timing;
+ std::vector<std::shared_ptr<Core::Timing::EventType>> events{
+ Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>),
+ Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>),
+ Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>),
+ Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>),
+ Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>),
+ };
+
+ expected_callback = 0;
+
+ core_timing.SyncPause(true);
+
+ u64 one_micro = 1000U;
+ for (std::size_t i = 0; i < events.size(); i++) {
+ u64 order = calls_order[i];
+ core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
+ }
+ /// test pause
+ REQUIRE(callbacks_ran_flags.none());
- std::shared_ptr<Core::Timing::EventType> cb_a =
- Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>);
- std::shared_ptr<Core::Timing::EventType> cb_b =
- Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>);
- std::shared_ptr<Core::Timing::EventType> cb_c =
- Core::Timing::CreateEvent("callbackC", CallbackTemplate<2>);
- std::shared_ptr<Core::Timing::EventType> cb_d =
- Core::Timing::CreateEvent("callbackD", CallbackTemplate<3>);
- std::shared_ptr<Core::Timing::EventType> cb_e =
- Core::Timing::CreateEvent("callbackE", CallbackTemplate<4>);
-
- // Enter slice 0
- core_timing.ResetRun();
-
- // D -> B -> C -> A -> E
- core_timing.SwitchContext(0);
- core_timing.ScheduleEvent(1000, cb_a, CB_IDS[0]);
- REQUIRE(1000 == core_timing.GetDowncount());
- core_timing.ScheduleEvent(500, cb_b, CB_IDS[1]);
- REQUIRE(500 == core_timing.GetDowncount());
- core_timing.ScheduleEvent(800, cb_c, CB_IDS[2]);
- REQUIRE(500 == core_timing.GetDowncount());
- core_timing.ScheduleEvent(100, cb_d, CB_IDS[3]);
- REQUIRE(100 == core_timing.GetDowncount());
- core_timing.ScheduleEvent(1200, cb_e, CB_IDS[4]);
- REQUIRE(100 == core_timing.GetDowncount());
-
- AdvanceAndCheck(core_timing, 3, 0);
- AdvanceAndCheck(core_timing, 1, 1);
- AdvanceAndCheck(core_timing, 2, 2);
- AdvanceAndCheck(core_timing, 0, 3);
- AdvanceAndCheck(core_timing, 4, 0);
-}
-
-TEST_CASE("CoreTiming[FairSharing]", "[core]") {
+ core_timing.Pause(false); // No need to sync
- ScopeInit guard;
- auto& core_timing = guard.core_timing;
+ while (core_timing.HasPendingEvents())
+ ;
- std::shared_ptr<Core::Timing::EventType> empty_callback =
- Core::Timing::CreateEvent("empty_callback", EmptyCallback);
+ REQUIRE(callbacks_ran_flags.all());
- callbacks_done = 0;
- u64 MAX_CALLBACKS = 10;
- for (std::size_t i = 0; i < 10; i++) {
- core_timing.ScheduleEvent(i * 3333U, empty_callback, 0);
+ for (std::size_t i = 0; i < delays.size(); i++) {
+ const double delay = static_cast<double>(delays[i]);
+ const double micro = delay / 1000.0f;
+ const double mili = micro / 1000.0f;
+ printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
}
-
- const s64 advances = MAX_SLICE_LENGTH / 10;
- core_timing.ResetRun();
- u64 current_time = core_timing.GetTicks();
- bool keep_running{};
- do {
- keep_running = false;
- for (u32 active_core = 0; active_core < 4; ++active_core) {
- core_timing.SwitchContext(active_core);
- if (core_timing.CanCurrentContextRun()) {
- core_timing.AddTicks(std::min<s64>(advances, core_timing.GetDowncount()));
- core_timing.Advance();
- }
- keep_running |= core_timing.CanCurrentContextRun();
- }
- } while (keep_running);
- u64 current_time_2 = core_timing.GetTicks();
-
- REQUIRE(MAX_CALLBACKS == callbacks_done);
- REQUIRE(current_time_2 == current_time + MAX_SLICE_LENGTH * 4);
}
-TEST_CASE("Core::Timing[PredictableLateness]", "[core]") {
+TEST_CASE("CoreTiming[BasicOrderNoPausing]", "[core]") {
ScopeInit guard;
auto& core_timing = guard.core_timing;
+ std::vector<std::shared_ptr<Core::Timing::EventType>> events{
+ Core::Timing::CreateEvent("callbackA", HostCallbackTemplate<0>),
+ Core::Timing::CreateEvent("callbackB", HostCallbackTemplate<1>),
+ Core::Timing::CreateEvent("callbackC", HostCallbackTemplate<2>),
+ Core::Timing::CreateEvent("callbackD", HostCallbackTemplate<3>),
+ Core::Timing::CreateEvent("callbackE", HostCallbackTemplate<4>),
+ };
+
+ core_timing.SyncPause(true);
+ core_timing.SyncPause(false);
+
+ expected_callback = 0;
+
+ u64 start = core_timing.GetGlobalTimeNs().count();
+ u64 one_micro = 1000U;
+ for (std::size_t i = 0; i < events.size(); i++) {
+ u64 order = calls_order[i];
+ core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
+ }
+ u64 end = core_timing.GetGlobalTimeNs().count();
+ const double scheduling_time = static_cast<double>(end - start);
+ const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
- std::shared_ptr<Core::Timing::EventType> cb_a =
- Core::Timing::CreateEvent("callbackA", CallbackTemplate<0>);
- std::shared_ptr<Core::Timing::EventType> cb_b =
- Core::Timing::CreateEvent("callbackB", CallbackTemplate<1>);
+ while (core_timing.HasPendingEvents())
+ ;
- // Enter slice 0
- core_timing.ResetRun();
+ REQUIRE(callbacks_ran_flags.all());
- core_timing.ScheduleEvent(100, cb_a, CB_IDS[0]);
- core_timing.ScheduleEvent(200, cb_b, CB_IDS[1]);
+ for (std::size_t i = 0; i < delays.size(); i++) {
+ const double delay = static_cast<double>(delays[i]);
+ const double micro = delay / 1000.0f;
+ const double mili = micro / 1000.0f;
+ printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
+ }
- AdvanceAndCheck(core_timing, 0, 0, 10, -10); // (100 - 10)
- AdvanceAndCheck(core_timing, 1, 1, 50, -50);
+ const double micro = scheduling_time / 1000.0f;
+ const double mili = micro / 1000.0f;
+ printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
+ printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f,
+ timer_time / 1000000.f);
}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d23c53843..21c46a567 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,7 +1,10 @@
add_library(video_core STATIC
buffer_cache/buffer_block.h
buffer_cache/buffer_cache.h
+ buffer_cache/map_interval.cpp
buffer_cache/map_interval.h
+ compatible_formats.cpp
+ compatible_formats.h
dirty_flags.cpp
dirty_flags.h
dma_pusher.cpp
@@ -24,6 +27,14 @@ add_library(video_core STATIC
engines/shader_bytecode.h
engines/shader_header.h
engines/shader_type.h
+ macro/macro.cpp
+ macro/macro.h
+ macro/macro_hle.cpp
+ macro/macro_hle.h
+ macro/macro_interpreter.cpp
+ macro/macro_interpreter.h
+ macro/macro_jit_x64.cpp
+ macro/macro_jit_x64.h
fence_manager.h
gpu.cpp
gpu.h
@@ -35,8 +46,6 @@ add_library(video_core STATIC
gpu_thread.h
guest_driver.cpp
guest_driver.h
- macro_interpreter.cpp
- macro_interpreter.h
memory_manager.cpp
memory_manager.h
morton.cpp
@@ -44,11 +53,11 @@ add_library(video_core STATIC
query_cache.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
- rasterizer_cache.cpp
- rasterizer_cache.h
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
+ renderer_opengl/gl_arb_decompiler.cpp
+ renderer_opengl/gl_arb_decompiler.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_device.cpp
@@ -88,6 +97,7 @@ add_library(video_core STATIC
renderer_opengl/utils.h
sampler_cache.cpp
sampler_cache.h
+ shader_cache.h
shader/decode/arithmetic.cpp
shader/decode/arithmetic_immediate.cpp
shader/decode/bfe.cpp
@@ -228,7 +238,7 @@ endif()
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE glad)
+target_link_libraries(video_core PRIVATE glad xbyak)
if (ENABLE_VULKAN)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index e35ee0b67..e64170e66 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,48 +15,47 @@ namespace VideoCommon {
class BufferBlock {
public:
- bool Overlaps(const VAddr start, const VAddr end) const {
+ bool Overlaps(VAddr start, VAddr end) const {
return (cpu_addr < end) && (cpu_addr_end > start);
}
- bool IsInside(const VAddr other_start, const VAddr other_end) const {
+ bool IsInside(VAddr other_start, VAddr other_end) const {
return cpu_addr <= other_start && other_end <= cpu_addr_end;
}
- std::size_t GetOffset(const VAddr in_addr) {
+ std::size_t Offset(VAddr in_addr) const {
return static_cast<std::size_t>(in_addr - cpu_addr);
}
- VAddr GetCpuAddr() const {
+ VAddr CpuAddr() const {
return cpu_addr;
}
- VAddr GetCpuAddrEnd() const {
+ VAddr CpuAddrEnd() const {
return cpu_addr_end;
}
- void SetCpuAddr(const VAddr new_addr) {
+ void SetCpuAddr(VAddr new_addr) {
cpu_addr = new_addr;
cpu_addr_end = new_addr + size;
}
- std::size_t GetSize() const {
+ std::size_t Size() const {
return size;
}
- void SetEpoch(u64 new_epoch) {
- epoch = new_epoch;
+ u64 Epoch() const {
+ return epoch;
}
- u64 GetEpoch() {
- return epoch;
+ void SetEpoch(u64 new_epoch) {
+ epoch = new_epoch;
}
protected:
- explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
- SetCpuAddr(cpu_addr);
+ explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
+ SetCpuAddr(cpu_addr_);
}
- ~BufferBlock() = default;
private:
VAddr cpu_addr{};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 56e570994..cf8bdd021 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -12,11 +12,12 @@
#include <utility>
#include <vector>
-#include <boost/icl/interval_map.hpp>
+#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_set.hpp>
-#include <boost/range/iterator_range.hpp>
+#include <boost/intrusive/set.hpp>
#include "common/alignment.h"
+#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/core.h"
@@ -29,102 +30,124 @@
namespace VideoCommon {
-using MapInterval = std::shared_ptr<MapIntervalBase>;
-
-template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
+template <typename Buffer, typename BufferType, typename StreamBuffer>
class BufferCache {
+ using IntervalSet = boost::icl::interval_set<VAddr>;
+ using IntervalType = typename IntervalSet::interval_type;
+ using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
+
+ static constexpr u64 WRITE_PAGE_BIT = 11;
+ static constexpr u64 BLOCK_PAGE_BITS = 21;
+ static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
+
public:
- using BufferInfo = std::pair<BufferType, u64>;
+ struct BufferInfo {
+ BufferType handle;
+ u64 offset;
+ u64 address;
+ };
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
- const std::optional<VAddr> cpu_addr_opt =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
-
+ auto& memory_manager = system.GPU().MemoryManager();
+ const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
if (!cpu_addr_opt) {
- return {GetEmptyBuffer(size), 0};
+ return GetEmptyBuffer(size);
}
-
- VAddr cpu_addr = *cpu_addr_opt;
+ const VAddr cpu_addr = *cpu_addr_opt;
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) {
if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
- auto& memory_manager = system.GPU().MemoryManager();
+ const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
if (use_fast_cbuf) {
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
- return ConstBufferUpload(host_ptr, size);
+ u8* dest;
+ if (is_granular) {
+ dest = memory_manager.GetPointer(gpu_addr);
} else {
staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- return ConstBufferUpload(staging_buffer.data(), size);
+ dest = staging_buffer.data();
+ memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
}
+ return ConstBufferUpload(dest, size);
+ }
+ if (is_granular) {
+ u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
+ return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
+ std::memcpy(dest, host_ptr, size);
+ });
} else {
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
- return StreamBufferUpload(host_ptr, size, alignment);
- } else {
- staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- return StreamBufferUpload(staging_buffer.data(), size, alignment);
- }
+ return StreamBufferUpload(
+ size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
+ memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
+ });
}
}
}
- auto block = GetBlock(cpu_addr, size);
- auto map = MapAddress(block, gpu_addr, cpu_addr, size);
+ Buffer* const block = GetBlock(cpu_addr, size);
+ MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
+ if (!map) {
+ return GetEmptyBuffer(size);
+ }
if (is_written) {
map->MarkAsModified(true, GetModifiedTicks());
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
MarkForAsyncFlush(map);
}
- if (!map->IsWritten()) {
- map->MarkAsWritten(true);
- MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
+ if (!map->is_written) {
+ map->is_written = true;
+ MarkRegionAsWritten(map->start, map->end - 1);
}
}
- return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
+ return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
}
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
std::size_t alignment = 4) {
std::lock_guard lock{mutex};
- return StreamBufferUpload(raw_pointer, size, alignment);
+ return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
+ std::memcpy(dest, raw_pointer, size);
+ });
}
- void Map(std::size_t max_size) {
+ /// Prepares the buffer cache for data uploading
+ /// @param max_size Maximum number of bytes that will be uploaded
+ /// @return True when a stream buffer invalidation was required, false otherwise
+ bool Map(std::size_t max_size) {
std::lock_guard lock{mutex};
+ bool invalidated;
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
buffer_offset = buffer_offset_base;
+
+ return invalidated;
}
- /// Finishes the upload stream, returns true on bindings invalidation.
- bool Unmap() {
+ /// Finishes the upload stream
+ void Unmap() {
std::lock_guard lock{mutex};
-
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
- return std::exchange(invalidated, false);
}
+ /// Function called at the end of each frame, inteded for deferred operations
void TickFrame() {
++epoch;
+
while (!pending_destruction.empty()) {
// Delay at least 4 frames before destruction.
// This is due to triple buffering happening on some drivers.
static constexpr u64 epochs_to_destroy = 5;
- if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) {
+ if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
break;
}
- pending_destruction.pop_front();
+ pending_destruction.pop();
}
}
@@ -132,12 +155,11 @@ public:
void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
- std::vector<MapInterval> objects = GetMapsInRange(addr, size);
- std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
- return a->GetModificationTick() < b->GetModificationTick();
- });
- for (auto& object : objects) {
- if (object->IsModified() && object->IsRegistered()) {
+ VectorMapInterval objects = GetMapsInRange(addr, size);
+ std::sort(objects.begin(), objects.end(),
+ [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
+ for (MapInterval* object : objects) {
+ if (object->is_modified && object->is_registered) {
mutex.unlock();
FlushMap(object);
mutex.lock();
@@ -148,9 +170,9 @@ public:
bool MustFlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
- const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
- return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
- return map->IsModified() && map->IsRegistered();
+ const VectorMapInterval objects = GetMapsInRange(addr, size);
+ return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
+ return map->is_modified && map->is_registered;
});
}
@@ -158,9 +180,8 @@ public:
void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex};
- std::vector<MapInterval> objects = GetMapsInRange(addr, size);
- for (auto& object : objects) {
- if (object->IsRegistered()) {
+ for (auto& object : GetMapsInRange(addr, size)) {
+ if (object->is_registered) {
Unregister(object);
}
}
@@ -169,10 +190,10 @@ public:
void OnCPUWrite(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex};
- for (const auto& object : GetMapsInRange(addr, size)) {
- if (object->IsMemoryMarked() && object->IsRegistered()) {
+ for (MapInterval* object : GetMapsInRange(addr, size)) {
+ if (object->is_memory_marked && object->is_registered) {
UnmarkMemory(object);
- object->SetSyncPending(true);
+ object->is_sync_pending = true;
marked_for_unregister.emplace_back(object);
}
}
@@ -181,9 +202,9 @@ public:
void SyncGuestHost() {
std::lock_guard lock{mutex};
- for (const auto& object : marked_for_unregister) {
- if (object->IsRegistered()) {
- object->SetSyncPending(false);
+ for (auto& object : marked_for_unregister) {
+ if (object->is_registered) {
+ object->is_sync_pending = false;
Unregister(object);
}
}
@@ -192,9 +213,9 @@ public:
void CommitAsyncFlushes() {
if (uncommitted_flushes) {
- auto commit_list = std::make_shared<std::list<MapInterval>>();
- for (auto& map : *uncommitted_flushes) {
- if (map->IsRegistered() && map->IsModified()) {
+ auto commit_list = std::make_shared<std::list<MapInterval*>>();
+ for (MapInterval* map : *uncommitted_flushes) {
+ if (map->is_registered && map->is_modified) {
// TODO(Blinkhawk): Implement backend asynchronous flushing
// AsyncFlushMap(map)
commit_list->push_back(map);
@@ -228,8 +249,8 @@ public:
committed_flushes.pop_front();
return;
}
- for (MapInterval& map : *flush_list) {
- if (map->IsRegistered()) {
+ for (MapInterval* map : *flush_list) {
+ if (map->is_registered) {
// TODO(Blinkhawk): Replace this for reading the asynchronous flush
FlushMap(map);
}
@@ -237,104 +258,90 @@ public:
committed_flushes.pop_front();
}
- virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
+ virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
protected:
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
std::unique_ptr<StreamBuffer> stream_buffer)
- : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
- stream_buffer_handle{this->stream_buffer->GetHandle()} {}
+ : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
~BufferCache() = default;
- virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
-
- virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
-
- virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) = 0;
-
- virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
- u8* data) = 0;
-
- virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) = 0;
+ virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
return {};
}
/// Register an object into the cache
- void Register(const MapInterval& new_map, bool inherit_written = false) {
- const VAddr cpu_addr = new_map->GetStart();
+ MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
+ const VAddr cpu_addr = new_map.start;
if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
- new_map->GetGpuAddress());
- return;
+ new_map.gpu_addr);
+ return nullptr;
}
- const std::size_t size = new_map->GetEnd() - new_map->GetStart();
- new_map->MarkAsRegistered(true);
- const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
- mapped_addresses.insert({interval, new_map});
+ const std::size_t size = new_map.end - new_map.start;
+ new_map.is_registered = true;
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
- new_map->SetMemoryMarked(true);
+ new_map.is_memory_marked = true;
if (inherit_written) {
- MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
- new_map->MarkAsWritten(true);
+ MarkRegionAsWritten(new_map.start, new_map.end - 1);
+ new_map.is_written = true;
}
+ MapInterval* const storage = mapped_addresses_allocator.Allocate();
+ *storage = new_map;
+ mapped_addresses.insert(*storage);
+ return storage;
}
- void UnmarkMemory(const MapInterval& map) {
- if (!map->IsMemoryMarked()) {
+ void UnmarkMemory(MapInterval* map) {
+ if (!map->is_memory_marked) {
return;
}
- const std::size_t size = map->GetEnd() - map->GetStart();
- rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
- map->SetMemoryMarked(false);
+ const std::size_t size = map->end - map->start;
+ rasterizer.UpdatePagesCachedCount(map->start, size, -1);
+ map->is_memory_marked = false;
}
/// Unregisters an object from the cache
- void Unregister(const MapInterval& map) {
+ void Unregister(MapInterval* map) {
UnmarkMemory(map);
- map->MarkAsRegistered(false);
- if (map->IsSyncPending()) {
+ map->is_registered = false;
+ if (map->is_sync_pending) {
+ map->is_sync_pending = false;
marked_for_unregister.remove(map);
- map->SetSyncPending(false);
}
- if (map->IsWritten()) {
- UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
+ if (map->is_written) {
+ UnmarkRegionAsWritten(map->start, map->end - 1);
}
- const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
- mapped_addresses.erase(delete_interval);
+ const auto it = mapped_addresses.find(*map);
+ ASSERT(it != mapped_addresses.end());
+ mapped_addresses.erase(it);
+ mapped_addresses_allocator.Release(map);
}
private:
- MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
- return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
- }
-
- MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
- const std::size_t size) {
- std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
+ MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
+ std::size_t size) {
+ const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
auto& memory_manager = system.GPU().MemoryManager();
const VAddr cpu_addr_end = cpu_addr + size;
- MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
if (memory_manager.IsGranularRange(gpu_addr, size)) {
u8* host_ptr = memory_manager.GetPointer(gpu_addr);
- UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
+ block->Upload(block->Offset(cpu_addr), size, host_ptr);
} else {
staging_buffer.resize(size);
memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
- UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
+ block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
}
- Register(new_map);
- return new_map;
+ return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
}
const VAddr cpu_addr_end = cpu_addr + size;
if (overlaps.size() == 1) {
- MapInterval& current_map = overlaps[0];
+ MapInterval* const current_map = overlaps[0];
if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
return current_map;
}
@@ -344,60 +351,70 @@ private:
bool write_inheritance = false;
bool modified_inheritance = false;
// Calculate new buffer parameters
- for (auto& overlap : overlaps) {
- new_start = std::min(overlap->GetStart(), new_start);
- new_end = std::max(overlap->GetEnd(), new_end);
- write_inheritance |= overlap->IsWritten();
- modified_inheritance |= overlap->IsModified();
+ for (MapInterval* overlap : overlaps) {
+ new_start = std::min(overlap->start, new_start);
+ new_end = std::max(overlap->end, new_end);
+ write_inheritance |= overlap->is_written;
+ modified_inheritance |= overlap->is_modified;
}
GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
for (auto& overlap : overlaps) {
Unregister(overlap);
}
UpdateBlock(block, new_start, new_end, overlaps);
- MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
+
+ const MapInterval new_map{new_start, new_end, new_gpu_addr};
+ MapInterval* const map = Register(new_map, write_inheritance);
+ if (!map) {
+ return nullptr;
+ }
if (modified_inheritance) {
- new_map->MarkAsModified(true, GetModifiedTicks());
+ map->MarkAsModified(true, GetModifiedTicks());
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
- MarkForAsyncFlush(new_map);
+ MarkForAsyncFlush(map);
}
}
- Register(new_map, write_inheritance);
- return new_map;
+ return map;
}
- void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
- std::vector<MapInterval>& overlaps) {
+ void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
+ const VectorMapInterval& overlaps) {
const IntervalType base_interval{start, end};
IntervalSet interval_set{};
interval_set.add(base_interval);
for (auto& overlap : overlaps) {
- const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
+ const IntervalType subtract{overlap->start, overlap->end};
interval_set.subtract(subtract);
}
for (auto& interval : interval_set) {
- std::size_t size = interval.upper() - interval.lower();
- if (size > 0) {
- staging_buffer.resize(size);
- system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
- UploadBlockData(block, block->GetOffset(interval.lower()), size,
- staging_buffer.data());
+ const std::size_t size = interval.upper() - interval.lower();
+ if (size == 0) {
+ continue;
}
+ staging_buffer.resize(size);
+ system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+ block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
}
}
- std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
+ VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
+ VectorMapInterval result;
if (size == 0) {
- return {};
+ return result;
}
- std::vector<MapInterval> objects{};
- const IntervalType interval{addr, addr + size};
- for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
- objects.push_back(pair.second);
+ const VAddr addr_end = addr + size;
+ auto it = mapped_addresses.lower_bound(addr);
+ if (it != mapped_addresses.begin()) {
+ --it;
}
-
- return objects;
+ while (it != mapped_addresses.end() && it->start < addr_end) {
+ if (it->Overlaps(addr, addr_end)) {
+ result.push_back(&*it);
+ }
+ ++it;
+ }
+ return result;
}
/// Returns a ticks counter used for tracking when cached objects were last modified
@@ -405,24 +422,28 @@ private:
return ++modified_ticks;
}
- void FlushMap(MapInterval map) {
- std::size_t size = map->GetEnd() - map->GetStart();
- OwnerBuffer block = blocks[map->GetStart() >> block_page_bits];
+ void FlushMap(MapInterval* map) {
+ const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
+ ASSERT_OR_EXECUTE(it != blocks.end(), return;);
+
+ std::shared_ptr<Buffer> block = it->second;
+
+ const std::size_t size = map->end - map->start;
staging_buffer.resize(size);
- DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
- system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
+ block->Download(block->Offset(map->start), size, staging_buffer.data());
+ system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
map->MarkAsModified(false, 0);
}
- BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
- std::size_t alignment) {
+ template <typename Callable>
+ BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
AlignBuffer(alignment);
const std::size_t uploaded_offset = buffer_offset;
- std::memcpy(buffer_ptr, raw_pointer, size);
+ callable(buffer_ptr);
buffer_ptr += size;
buffer_offset += size;
- return {stream_buffer_handle, uploaded_offset};
+ return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
}
void AlignBuffer(std::size_t alignment) {
@@ -432,97 +453,89 @@ private:
buffer_offset = offset_aligned;
}
- OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
- const std::size_t old_size = buffer->GetSize();
- const std::size_t new_size = old_size + block_page_size;
- const VAddr cpu_addr = buffer->GetCpuAddr();
- OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
- CopyBlock(buffer, new_buffer, 0, 0, old_size);
- buffer->SetEpoch(epoch);
- pending_destruction.push_back(buffer);
+ std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
+ const std::size_t old_size = buffer->Size();
+ const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
+ const VAddr cpu_addr = buffer->CpuAddr();
+ std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
+ new_buffer->CopyFrom(*buffer, 0, 0, old_size);
+ QueueDestruction(std::move(buffer));
+
const VAddr cpu_addr_end = cpu_addr + new_size - 1;
- u64 page_start = cpu_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
- blocks[page_start] = new_buffer;
- ++page_start;
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+ blocks.insert_or_assign(page_start, new_buffer);
}
+
return new_buffer;
}
- OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
- const std::size_t size_1 = first->GetSize();
- const std::size_t size_2 = second->GetSize();
- const VAddr first_addr = first->GetCpuAddr();
- const VAddr second_addr = second->GetCpuAddr();
+ std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
+ std::shared_ptr<Buffer> second) {
+ const std::size_t size_1 = first->Size();
+ const std::size_t size_2 = second->Size();
+ const VAddr first_addr = first->CpuAddr();
+ const VAddr second_addr = second->CpuAddr();
const VAddr new_addr = std::min(first_addr, second_addr);
const std::size_t new_size = size_1 + size_2;
- OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
- CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
- CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
- first->SetEpoch(epoch);
- second->SetEpoch(epoch);
- pending_destruction.push_back(first);
- pending_destruction.push_back(second);
+
+ std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
+ new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
+ new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
+ QueueDestruction(std::move(first));
+ QueueDestruction(std::move(second));
+
const VAddr cpu_addr_end = new_addr + new_size - 1;
- u64 page_start = new_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
- blocks[page_start] = new_buffer;
- ++page_start;
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
+ blocks.insert_or_assign(page_start, new_buffer);
}
return new_buffer;
}
- OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
- OwnerBuffer found;
+ Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
+ std::shared_ptr<Buffer> found;
+
const VAddr cpu_addr_end = cpu_addr + size - 1;
- u64 page_start = cpu_addr >> block_page_bits;
- const u64 page_end = cpu_addr_end >> block_page_bits;
- while (page_start <= page_end) {
+ const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
+ for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
auto it = blocks.find(page_start);
if (it == blocks.end()) {
if (found) {
found = EnlargeBlock(found);
- } else {
- const VAddr start_addr = (page_start << block_page_bits);
- found = CreateBlock(start_addr, block_page_size);
- blocks[page_start] = found;
- }
- } else {
- if (found) {
- if (found == it->second) {
- ++page_start;
- continue;
- }
- found = MergeBlocks(found, it->second);
- } else {
- found = it->second;
+ continue;
}
+ const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
+ found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
+ blocks.insert_or_assign(page_start, found);
+ continue;
+ }
+ if (!found) {
+ found = it->second;
+ continue;
+ }
+ if (found != it->second) {
+ found = MergeBlocks(std::move(found), it->second);
}
- ++page_start;
}
- return found;
+ return found.get();
}
- void MarkRegionAsWritten(const VAddr start, const VAddr end) {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
+ void MarkRegionAsWritten(VAddr start, VAddr end) {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
auto it = written_pages.find(page_start);
if (it != written_pages.end()) {
it->second = it->second + 1;
} else {
- written_pages[page_start] = 1;
+ written_pages.insert_or_assign(page_start, 1);
}
- page_start++;
}
}
- void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
+ void UnmarkRegionAsWritten(VAddr start, VAddr end) {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
auto it = written_pages.find(page_start);
if (it != written_pages.end()) {
if (it->second > 1) {
@@ -531,25 +544,27 @@ private:
written_pages.erase(it);
}
}
- page_start++;
}
}
- bool IsRegionWritten(const VAddr start, const VAddr end) const {
- u64 page_start = start >> write_page_bit;
- const u64 page_end = end >> write_page_bit;
- while (page_start <= page_end) {
+ bool IsRegionWritten(VAddr start, VAddr end) const {
+ const u64 page_end = end >> WRITE_PAGE_BIT;
+ for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
if (written_pages.count(page_start) > 0) {
return true;
}
- page_start++;
}
return false;
}
- void MarkForAsyncFlush(MapInterval& map) {
+ void QueueDestruction(std::shared_ptr<Buffer> buffer) {
+ buffer->SetEpoch(epoch);
+ pending_destruction.push(std::move(buffer));
+ }
+
+ void MarkForAsyncFlush(MapInterval* map) {
if (!uncommitted_flushes) {
- uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
+ uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
}
uncommitted_flushes->insert(map);
}
@@ -558,35 +573,29 @@ private:
Core::System& system;
std::unique_ptr<StreamBuffer> stream_buffer;
- BufferType stream_buffer_handle{};
-
- bool invalidated = false;
+ BufferType stream_buffer_handle;
u8* buffer_ptr = nullptr;
u64 buffer_offset = 0;
u64 buffer_offset_base = 0;
- using IntervalSet = boost::icl::interval_set<VAddr>;
- using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
- using IntervalType = typename IntervalCache::interval_type;
- IntervalCache mapped_addresses;
+ MapIntervalAllocator mapped_addresses_allocator;
+ boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
+ mapped_addresses;
- static constexpr u64 write_page_bit = 11;
std::unordered_map<u64, u32> written_pages;
+ std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
- static constexpr u64 block_page_bits = 21;
- static constexpr u64 block_page_size = 1ULL << block_page_bits;
- std::unordered_map<u64, OwnerBuffer> blocks;
-
- std::list<OwnerBuffer> pending_destruction;
+ std::queue<std::shared_ptr<Buffer>> pending_destruction;
u64 epoch = 0;
u64 modified_ticks = 0;
std::vector<u8> staging_buffer;
- std::list<MapInterval> marked_for_unregister;
- std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
- std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
+ std::list<MapInterval*> marked_for_unregister;
+
+ std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
+ std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
std::recursive_mutex mutex;
};
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
new file mode 100644
index 000000000..62587e18a
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.cpp
@@ -0,0 +1,33 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <memory>
+
+#include "video_core/buffer_cache/map_interval.h"
+
+namespace VideoCommon {
+
+MapIntervalAllocator::MapIntervalAllocator() {
+ FillFreeList(first_chunk);
+}
+
+MapIntervalAllocator::~MapIntervalAllocator() = default;
+
+void MapIntervalAllocator::AllocateNewChunk() {
+ *new_chunk = std::make_unique<Chunk>();
+ FillFreeList(**new_chunk);
+ new_chunk = &(*new_chunk)->next;
+}
+
+void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
+ const std::size_t old_size = free_list.size();
+ free_list.resize(old_size + chunk.data.size());
+ std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
+ [](MapInterval& interval) { return &interval; });
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index 29d8b26f3..fe0bcd1d8 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -4,104 +4,89 @@
#pragma once
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include <boost/intrusive/set_hook.hpp>
+
#include "common/common_types.h"
#include "video_core/gpu.h"
namespace VideoCommon {
-class MapIntervalBase {
-public:
- MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
- : start{start}, end{end}, gpu_addr{gpu_addr} {}
-
- void SetCpuAddress(VAddr new_cpu_addr) {
- cpu_addr = new_cpu_addr;
- }
-
- VAddr GetCpuAddress() const {
- return cpu_addr;
- }
-
- GPUVAddr GetGpuAddress() const {
- return gpu_addr;
- }
-
- bool IsInside(const VAddr other_start, const VAddr other_end) const {
- return (start <= other_start && other_end <= end);
- }
-
- bool operator==(const MapIntervalBase& rhs) const {
- return std::tie(start, end) == std::tie(rhs.start, rhs.end);
- }
-
- bool operator!=(const MapIntervalBase& rhs) const {
- return !operator==(rhs);
- }
+struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
+ MapInterval() = default;
- void MarkAsRegistered(const bool registered) {
- is_registered = registered;
- }
+ /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
- bool IsRegistered() const {
- return is_registered;
- }
+ explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
+ : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
- void SetMemoryMarked(bool is_memory_marked_) {
- is_memory_marked = is_memory_marked_;
+ bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
+ return start <= other_start && other_end <= end;
}
- bool IsMemoryMarked() const {
- return is_memory_marked;
+ bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
+ return start < other_end && other_start < end;
}
- void SetSyncPending(bool is_sync_pending_) {
- is_sync_pending = is_sync_pending_;
- }
+ void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
+ is_modified = is_modified_;
+ ticks = ticks_;
+ }
+
+ boost::intrusive::set_member_hook<> member_hook_;
+ VAddr start = 0;
+ VAddr end = 0;
+ GPUVAddr gpu_addr = 0;
+ u64 ticks = 0;
+ bool is_written = false;
+ bool is_modified = false;
+ bool is_registered = false;
+ bool is_memory_marked = false;
+ bool is_sync_pending = false;
+};
- bool IsSyncPending() const {
- return is_sync_pending;
+struct MapIntervalCompare {
+ constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
+ return lhs.start < rhs.start;
}
+};
- VAddr GetStart() const {
- return start;
- }
+class MapIntervalAllocator {
+public:
+ MapIntervalAllocator();
+ ~MapIntervalAllocator();
- VAddr GetEnd() const {
- return end;
+ MapInterval* Allocate() {
+ if (free_list.empty()) {
+ AllocateNewChunk();
+ }
+ MapInterval* const interval = free_list.back();
+ free_list.pop_back();
+ return interval;
}
- void MarkAsModified(const bool is_modified_, const u64 tick) {
- is_modified = is_modified_;
- ticks = tick;
+ void Release(MapInterval* interval) {
+ free_list.push_back(interval);
}
- bool IsModified() const {
- return is_modified;
- }
+private:
+ struct Chunk {
+ std::unique_ptr<Chunk> next;
+ std::array<MapInterval, 0x8000> data;
+ };
- u64 GetModificationTick() const {
- return ticks;
- }
+ void AllocateNewChunk();
- void MarkAsWritten(const bool is_written_) {
- is_written = is_written_;
- }
+ void FillFreeList(Chunk& chunk);
- bool IsWritten() const {
- return is_written;
- }
+ std::vector<MapInterval*> free_list;
+ std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
-private:
- VAddr start;
- VAddr end;
- GPUVAddr gpu_addr;
- VAddr cpu_addr{};
- bool is_written{};
- bool is_modified{};
- bool is_registered{};
- bool is_memory_marked{};
- bool is_sync_pending{};
- u64 ticks{};
+ Chunk first_chunk;
};
} // namespace VideoCommon
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
new file mode 100644
index 000000000..6c426b035
--- /dev/null
+++ b/src/video_core/compatible_formats.cpp
@@ -0,0 +1,162 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bitset>
+#include <cstddef>
+
+#include "video_core/compatible_formats.h"
+#include "video_core/surface.h"
+
+namespace VideoCore::Surface {
+
+namespace {
+
+// Compatibility table taken from Table 3.X.2 in:
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
+
+constexpr std::array VIEW_CLASS_128_BITS = {
+ PixelFormat::RGBA32F,
+ PixelFormat::RGBA32UI,
+};
+// Missing formats:
+// PixelFormat::RGBA32I
+
+constexpr std::array VIEW_CLASS_96_BITS = {
+ PixelFormat::RGB32F,
+};
+// Missing formats:
+// PixelFormat::RGB32UI,
+// PixelFormat::RGB32I,
+
+constexpr std::array VIEW_CLASS_64_BITS = {
+ PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI,
+ PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S,
+};
+// Missing formats:
+// PixelFormat::RGBA16I
+// PixelFormat::RG32I
+
+// TODO: How should we handle 48 bits?
+
+constexpr std::array VIEW_CLASS_32_BITS = {
+ PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F,
+ PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI,
+ PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U,
+ PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S,
+ PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8,
+ PixelFormat::BGRA8_SRGB,
+};
+// Missing formats:
+// PixelFormat::RGBA8UI
+// PixelFormat::RGBA8I
+// PixelFormat::RGB10_A2_UI
+
+// TODO: How should we handle 24 bits?
+
+constexpr std::array VIEW_CLASS_16_BITS = {
+ PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I,
+ PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S,
+};
+// Missing formats:
+// PixelFormat::RG8I
+
+constexpr std::array VIEW_CLASS_8_BITS = {
+ PixelFormat::R8UI,
+ PixelFormat::R8U,
+};
+// Missing formats:
+// PixelFormat::R8I
+// PixelFormat::R8S
+
+constexpr std::array VIEW_CLASS_RGTC1_RED = {
+ PixelFormat::DXN1,
+};
+// Missing formats:
+// COMPRESSED_SIGNED_RED_RGTC1
+
+constexpr std::array VIEW_CLASS_RGTC2_RG = {
+ PixelFormat::DXN2UNORM,
+ PixelFormat::DXN2SNORM,
+};
+
+constexpr std::array VIEW_CLASS_BPTC_UNORM = {
+ PixelFormat::BC7U,
+ PixelFormat::BC7U_SRGB,
+};
+
+constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
+ PixelFormat::BC6H_SF16,
+ PixelFormat::BC6H_UF16,
+};
+
+// Compatibility table taken from Table 4.X.1 in:
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
+
+constexpr std::array COPY_CLASS_128_BITS = {
+ PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23,
+ PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB,
+ PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB,
+ PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16,
+};
+// Missing formats:
+// PixelFormat::RGBA32I
+// COMPRESSED_RG_RGTC2
+
+constexpr std::array COPY_CLASS_64_BITS = {
+ PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI,
+ PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1,
+
+};
+// Missing formats:
+// PixelFormat::RGBA16I
+// PixelFormat::RG32I,
+// COMPRESSED_RGB_S3TC_DXT1_EXT
+// COMPRESSED_SRGB_S3TC_DXT1_EXT
+// COMPRESSED_RGBA_S3TC_DXT1_EXT
+// COMPRESSED_SIGNED_RED_RGTC1
+
+void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
+ compatiblity[format_a][format_b] = true;
+ compatiblity[format_b][format_a] = true;
+}
+
+void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
+ Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
+}
+
+template <typename Range>
+void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
+ for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
+ for (auto it_b = it_a; it_b != range.end(); ++it_b) {
+ Enable(compatibility, *it_a, *it_b);
+ }
+ }
+}
+
+} // Anonymous namespace
+
+FormatCompatibility::FormatCompatibility() {
+ for (size_t i = 0; i < MaxPixelFormat; ++i) {
+ // Identity is allowed
+ Enable(view, i, i);
+ }
+
+ EnableRange(view, VIEW_CLASS_128_BITS);
+ EnableRange(view, VIEW_CLASS_96_BITS);
+ EnableRange(view, VIEW_CLASS_64_BITS);
+ EnableRange(view, VIEW_CLASS_32_BITS);
+ EnableRange(view, VIEW_CLASS_16_BITS);
+ EnableRange(view, VIEW_CLASS_8_BITS);
+ EnableRange(view, VIEW_CLASS_RGTC1_RED);
+ EnableRange(view, VIEW_CLASS_RGTC2_RG);
+ EnableRange(view, VIEW_CLASS_BPTC_UNORM);
+ EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
+
+ copy = view;
+ EnableRange(copy, COPY_CLASS_128_BITS);
+ EnableRange(copy, COPY_CLASS_64_BITS);
+}
+
+} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
new file mode 100644
index 000000000..d1082566d
--- /dev/null
+++ b/src/video_core/compatible_formats.h
@@ -0,0 +1,32 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <bitset>
+#include <cstddef>
+
+#include "video_core/surface.h"
+
+namespace VideoCore::Surface {
+
+class FormatCompatibility {
+public:
+ using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
+
+ explicit FormatCompatibility();
+
+ bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
+ return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
+ }
+
+ bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
+ return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
+ }
+
+private:
+ Table view;
+ Table copy;
+};
+
+} // namespace VideoCore::Surface
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index bdc023d54..f2f96ac33 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -54,9 +54,7 @@ bool DmaPusher::Step() {
return true;
});
const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
- GPUVAddr dma_get = command_list_header.addr;
- GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
- bool non_main = command_list_header.is_non_main;
+ const GPUVAddr dma_get = command_list_header.addr;
if (dma_pushbuffer_subindex >= command_list.size()) {
// We've gone through the current list, remove it from the queue
@@ -133,11 +131,6 @@ bool DmaPusher::Step() {
index++;
}
- if (!non_main) {
- // TODO (degasus): This is dead code, as dma_mget is never read.
- dma_mget = dma_put;
- }
-
return true;
}
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index e8b714e94..efa90d170 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -102,7 +102,6 @@ private:
DmaState dma_state{};
bool dma_increment_once{};
- GPUVAddr dma_mget{}; ///< main pushbuffer last read address
bool ib_enable{true}; ///< IB mode enabled
std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{};
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index ebe139504..f46e81bb7 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -93,6 +93,7 @@ public:
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const = 0;
+ virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
virtual u32 GetBoundBuffer() const = 0;
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index f6237fc6a..a82b06a38 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
ASSERT(stage == ShaderType::Compute);
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
+ return AccessSampler(memory_manager.Read<u32>(tex_info_address));
+}
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
+ const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 18ceedfaf..b7f668d88 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -219,6 +219,8 @@ public:
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
+ SamplerDescriptor AccessSampler(u32 handle) const override;
+
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 33936e209..c01436295 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -25,9 +25,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
MemoryManager& memory_manager)
: system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
- macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
+ macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
dirty.flags.flip();
-
InitializeRegisterDefaults();
}
@@ -44,6 +43,12 @@ void Maxwell3D::InitializeRegisterDefaults() {
viewport.depth_range_near = 0.0f;
viewport.depth_range_far = 1.0f;
}
+ for (auto& viewport : regs.viewport_transform) {
+ viewport.swizzle.x.Assign(Regs::ViewportSwizzle::PositiveX);
+ viewport.swizzle.y.Assign(Regs::ViewportSwizzle::PositiveY);
+ viewport.swizzle.z.Assign(Regs::ViewportSwizzle::PositiveZ);
+ viewport.swizzle.w.Assign(Regs::ViewportSwizzle::PositiveW);
+ }
// Doom and Bomberman seems to use the uninitialized registers and just enable blend
// so initialize blend registers with sane values
@@ -100,7 +105,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
regs.rasterize_enable = 1;
regs.rt_separate_frag_data = 1;
regs.framebuffer_srgb = 1;
+ regs.line_width_aliased = 1.0f;
+ regs.line_width_smooth = 1.0f;
regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
+ regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
+ regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
shadow_state = regs;
@@ -110,7 +119,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
-void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
+void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
// Reset the current macro.
executing_macro = 0;
@@ -119,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
// Execute the current macro.
- macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
+ macro_engine->Execute(*this, macro_positions[entry], parameters);
if (mme_draw.current_mode != MMEDrawMode::Undefined) {
FlushMMEInlineDraw();
}
@@ -155,7 +164,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
// Call the macro when there are no more parameters in the command buffer
if (is_last_call) {
- CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
+ CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
}
return;
@@ -191,7 +200,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
break;
}
case MAXWELL3D_REG_INDEX(macros.data): {
- ProcessMacroUpload(arg);
+ macro_engine->AddCode(regs.macros.upload_address, arg);
break;
}
case MAXWELL3D_REG_INDEX(macros.bind): {
@@ -300,7 +309,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
// Call the macro when there are no more parameters in the command buffer
if (amount == methods_pending) {
- CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
+ CallMacroMethod(executing_macro, macro_params);
macro_params.clear();
}
return;
@@ -414,9 +423,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
}
void Maxwell3D::ProcessMacroUpload(u32 data) {
- ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
- "upload_address exceeded macro_memory size!");
- macro_memory[regs.macros.upload_address++] = data;
+ macro_engine->AddCode(regs.macros.upload_address++, data);
}
void Maxwell3D::ProcessMacroBind(u32 data) {
@@ -451,8 +458,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
- ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
- "Units other than CROP are unimplemented");
+ if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
+ LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
+ }
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
@@ -528,8 +536,8 @@ void Maxwell3D::ProcessCounterReset() {
rasterizer.ResetCounter(QueryType::SamplesPassed);
break;
default:
- LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
- static_cast<int>(regs.counter_reset));
+ LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
+ static_cast<int>(regs.counter_reset));
break;
}
}
@@ -586,8 +594,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
system.GPU().GetTicks());
return {};
default:
- UNIMPLEMENTED_MSG("Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
+ LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
+ static_cast<u32>(regs.query.query_get.select.Value()));
return 1;
}
}
@@ -732,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& tex_info_buffer = shader.const_buffers[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
+ return AccessSampler(memory_manager.Read<u32>(tex_info_address));
+}
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
+ const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1a5df05ce..ef1618990 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -23,7 +23,7 @@
#include "video_core/engines/engine_upload.h"
#include "video_core/engines/shader_type.h"
#include "video_core/gpu.h"
-#include "video_core/macro_interpreter.h"
+#include "video_core/macro/macro.h"
#include "video_core/textures/texture.h"
namespace Core {
@@ -576,6 +576,17 @@ public:
Replay = 3,
};
+ enum class ViewportSwizzle : u32 {
+ PositiveX = 0,
+ NegativeX = 1,
+ PositiveY = 2,
+ NegativeY = 3,
+ PositiveZ = 4,
+ NegativeZ = 5,
+ PositiveW = 6,
+ NegativeW = 7,
+ };
+
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
@@ -587,6 +598,7 @@ public:
BitField<4, 3, u32> block_height;
BitField<8, 3, u32> block_depth;
BitField<12, 1, InvMemoryLayout> type;
+ BitField<16, 1, u32> is_3d;
} memory_layout;
union {
BitField<0, 16, u32> layers;
@@ -619,7 +631,14 @@ public:
f32 translate_x;
f32 translate_y;
f32 translate_z;
- INSERT_UNION_PADDING_WORDS(2);
+ union {
+ u32 raw;
+ BitField<0, 3, ViewportSwizzle> x;
+ BitField<4, 3, ViewportSwizzle> y;
+ BitField<8, 3, ViewportSwizzle> z;
+ BitField<12, 3, ViewportSwizzle> w;
+ } swizzle;
+ INSERT_UNION_PADDING_WORDS(1);
Common::Rectangle<f32> GetRect() const {
return {
@@ -1385,6 +1404,8 @@ public:
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override;
+ SamplerDescriptor AccessSampler(u32 handle) const override;
+
u32 GetBoundBuffer() const override {
return regs.tex_cb_index;
}
@@ -1393,17 +1414,16 @@ public:
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
- /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
- /// we've seen used.
- using MacroMemory = std::array<u32, 0x40000>;
+ bool ShouldExecute() const {
+ return execute_on;
+ }
- /// Gets a reference to macro memory.
- const MacroMemory& GetMacroMemory() const {
- return macro_memory;
+ VideoCore::RasterizerInterface& GetRasterizer() {
+ return rasterizer;
}
- bool ShouldExecute() const {
- return execute_on;
+ const VideoCore::RasterizerInterface& GetRasterizer() const {
+ return rasterizer;
}
/// Notify a memory write has happened.
@@ -1450,16 +1470,13 @@ private:
std::array<bool, Regs::NUM_REGS> mme_inline{};
- /// Memory for macro code
- MacroMemory macro_memory;
-
/// Macro method that is currently being executed / being fed parameters.
u32 executing_macro = 0;
/// Parameters that have been submitted to the macro call so far.
std::vector<u32> macro_params;
/// Interpreter for the macro codes uploaded to the GPU.
- MacroInterpreter macro_interpreter;
+ std::unique_ptr<MacroEngine> macro_engine;
static constexpr u32 null_cb_data = 0xFFFFFFFF;
struct {
@@ -1488,7 +1505,7 @@ private:
* @param num_parameters Number of arguments
* @param parameters Arguments to the method call
*/
- void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters);
+ void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
/// Handles writes to the macro uploading register.
void ProcessMacroUpload(u32 data);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8dae754d4..d374b73cf 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -168,18 +168,22 @@ enum class Pred : u64 {
};
enum class PredCondition : u64 {
- LessThan = 1,
- Equal = 2,
- LessEqual = 3,
- GreaterThan = 4,
- NotEqual = 5,
- GreaterEqual = 6,
- LessThanWithNan = 9,
- LessEqualWithNan = 11,
- GreaterThanWithNan = 12,
- NotEqualWithNan = 13,
- GreaterEqualWithNan = 14,
- // TODO(Subv): Other condition types
+ F = 0, // Always false
+ LT = 1, // Ordered less than
+ EQ = 2, // Ordered equal
+ LE = 3, // Ordered less than or equal
+ GT = 4, // Ordered greater than
+ NE = 5, // Ordered not equal
+ GE = 6, // Ordered greater than or equal
+ NUM = 7, // Ordered
+ NAN_ = 8, // Unordered
+ LTU = 9, // Unordered less than
+ EQU = 10, // Unordered equal
+ LEU = 11, // Unordered less than or equal
+ GTU = 12, // Unordered greater than
+ NEU = 13, // Unordered not equal
+ GEU = 14, // Unordered greater than or equal
+ T = 15, // Always true
};
enum class PredOperation : u64 {
@@ -657,6 +661,10 @@ union Instruction {
constexpr Instruction(u64 value) : value{value} {}
constexpr Instruction(const Instruction& instr) : value(instr.value) {}
+ constexpr bool Bit(u64 offset) const {
+ return ((value >> offset) & 1) != 0;
+ }
+
BitField<0, 8, Register> gpr0;
BitField<8, 8, Register> gpr8;
union {
@@ -1870,7 +1878,9 @@ public:
HSETP2_C,
HSETP2_R,
HSETP2_IMM,
+ HSET2_C,
HSET2_R,
+ HSET2_IMM,
POPC_C,
POPC_R,
POPC_IMM,
@@ -2190,7 +2200,9 @@ private:
INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
+ INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
+ INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8eb017f65..482e49711 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <chrono>
+
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
@@ -154,8 +156,7 @@ u64 GPU::GetTicks() const {
constexpr u64 gpu_ticks_num = 384;
constexpr u64 gpu_ticks_den = 625;
- const u64 cpu_ticks = system.CoreTiming().GetTicks();
- u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+ u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
if (Settings::values.use_fast_gpu_time) {
nanoseconds /= 256;
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index dd51c95b7..2c42483bd 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -68,6 +68,7 @@ enum class RenderTargetFormat : u32 {
BGR5A1_UNORM = 0xE9,
RG8_UNORM = 0xEA,
RG8_SNORM = 0xEB,
+ RG8_UINT = 0xED,
R16_UNORM = 0xEE,
R16_SNORM = 0xEF,
R16_SINT = 0xF0,
@@ -283,6 +284,12 @@ public:
/// core timing events.
virtual void Start() = 0;
+ /// Obtain the CPU Context
+ virtual void ObtainContext() = 0;
+
+ /// Release the CPU Context
+ virtual void ReleaseContext() = 0;
+
/// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 53305ab43..7b855f63e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::Start() {
- cpu_context->MakeCurrent();
gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
}
+void GPUAsynch::ObtainContext() {
+ cpu_context->MakeCurrent();
+}
+
+void GPUAsynch::ReleaseContext() {
+ cpu_context->DoneCurrent();
+}
+
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 517658612..15e9f1d38 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,8 @@ public:
~GPUAsynch() override;
void Start() override;
+ void ObtainContext() override;
+ void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 6f38a672a..aaeb9811d 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase
GPUSynch::~GPUSynch() = default;
-void GPUSynch::Start() {
+void GPUSynch::Start() {}
+
+void GPUSynch::ObtainContext() {
context->MakeCurrent();
}
+void GPUSynch::ReleaseContext() {
+ context->DoneCurrent();
+}
+
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->Push(std::move(entries));
dma_pusher->DispatchCalls();
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 4a6e9a01d..762c20aa5 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,8 @@ public:
~GPUSynch() override;
void Start() override;
+ void ObtainContext() override;
+ void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c3bb4fe06..738c6f0c1 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
@@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread {
static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
- MicroProfileOnThreadCreate("GpuThread");
+ std::string name = "yuzu:GPU";
+ MicroProfileOnThreadCreate(name.c_str());
+ Common::SetCurrentThreadName(name.c_str());
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+ system.RegisterHostThread();
// Wait for first GPU command before acquiring the window context
while (state.queue.Empty())
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
new file mode 100644
index 000000000..a50e7b4e0
--- /dev/null
+++ b/src/video_core/macro/macro.cpp
@@ -0,0 +1,91 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+#include <boost/container_hash/hash.hpp>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/settings.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/macro/macro.h"
+#include "video_core/macro/macro_hle.h"
+#include "video_core/macro/macro_interpreter.h"
+#include "video_core/macro/macro_jit_x64.h"
+
+namespace Tegra {
+
+MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
+ : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
+
+MacroEngine::~MacroEngine() = default;
+
+void MacroEngine::AddCode(u32 method, u32 data) {
+ uploaded_macro_code[method].push_back(data);
+}
+
+void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
+ const std::vector<u32>& parameters) {
+ auto compiled_macro = macro_cache.find(method);
+ if (compiled_macro != macro_cache.end()) {
+ const auto& cache_info = compiled_macro->second;
+ if (cache_info.has_hle_program) {
+ cache_info.hle_program->Execute(parameters, method);
+ } else {
+ cache_info.lle_program->Execute(parameters, method);
+ }
+ } else {
+ // Macro not compiled, check if it's uploaded and if so, compile it
+ std::optional<u32> mid_method = std::nullopt;
+ const auto macro_code = uploaded_macro_code.find(method);
+ if (macro_code == uploaded_macro_code.end()) {
+ for (const auto& [method_base, code] : uploaded_macro_code) {
+ if (method >= method_base && (method - method_base) < code.size()) {
+ mid_method = method_base;
+ break;
+ }
+ }
+ if (!mid_method.has_value()) {
+ UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
+ return;
+ }
+ }
+ auto& cache_info = macro_cache[method];
+
+ if (!mid_method.has_value()) {
+ cache_info.lle_program = Compile(macro_code->second);
+ cache_info.hash = boost::hash_value(macro_code->second);
+ } else {
+ const auto& macro_cached = uploaded_macro_code[mid_method.value()];
+ const auto rebased_method = method - mid_method.value();
+ auto& code = uploaded_macro_code[method];
+ code.resize(macro_cached.size() - rebased_method);
+ std::memcpy(code.data(), macro_cached.data() + rebased_method,
+ code.size() * sizeof(u32));
+ cache_info.hash = boost::hash_value(code);
+ cache_info.lle_program = Compile(code);
+ }
+
+ auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
+ if (hle_program.has_value()) {
+ cache_info.has_hle_program = true;
+ cache_info.hle_program = std::move(hle_program.value());
+ cache_info.hle_program->Execute(parameters, method);
+ } else {
+ cache_info.lle_program->Execute(parameters, method);
+ }
+ }
+}
+
+std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) {
+ if (Settings::values.disable_macro_jit) {
+ return std::make_unique<MacroInterpreter>(maxwell3d);
+ }
+#ifdef ARCHITECTURE_x86_64
+ return std::make_unique<MacroJITx64>(maxwell3d);
+#else
+ return std::make_unique<MacroInterpreter>(maxwell3d);
+#endif
+}
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
new file mode 100644
index 000000000..4d00b84b0
--- /dev/null
+++ b/src/video_core/macro/macro.h
@@ -0,0 +1,141 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+}
+
+namespace Macro {
+constexpr std::size_t NUM_MACRO_REGISTERS = 8;
+enum class Operation : u32 {
+ ALU = 0,
+ AddImmediate = 1,
+ ExtractInsert = 2,
+ ExtractShiftLeftImmediate = 3,
+ ExtractShiftLeftRegister = 4,
+ Read = 5,
+ Unused = 6, // This operation doesn't seem to be a valid encoding.
+ Branch = 7,
+};
+
+enum class ALUOperation : u32 {
+ Add = 0,
+ AddWithCarry = 1,
+ Subtract = 2,
+ SubtractWithBorrow = 3,
+ // Operations 4-7 don't seem to be valid encodings.
+ Xor = 8,
+ Or = 9,
+ And = 10,
+ AndNot = 11,
+ Nand = 12
+};
+
+enum class ResultOperation : u32 {
+ IgnoreAndFetch = 0,
+ Move = 1,
+ MoveAndSetMethod = 2,
+ FetchAndSend = 3,
+ MoveAndSend = 4,
+ FetchAndSetMethod = 5,
+ MoveAndSetMethodFetchAndSend = 6,
+ MoveAndSetMethodSend = 7
+};
+
+enum class BranchCondition : u32 {
+ Zero = 0,
+ NotZero = 1,
+};
+
+union Opcode {
+ u32 raw;
+ BitField<0, 3, Operation> operation;
+ BitField<4, 3, ResultOperation> result_operation;
+ BitField<4, 1, BranchCondition> branch_condition;
+ // If set on a branch, then the branch doesn't have a delay slot.
+ BitField<5, 1, u32> branch_annul;
+ BitField<7, 1, u32> is_exit;
+ BitField<8, 3, u32> dst;
+ BitField<11, 3, u32> src_a;
+ BitField<14, 3, u32> src_b;
+ // The signed immediate overlaps the second source operand and the alu operation.
+ BitField<14, 18, s32> immediate;
+
+ BitField<17, 5, ALUOperation> alu_operation;
+
+ // Bitfield instructions data
+ BitField<17, 5, u32> bf_src_bit;
+ BitField<22, 5, u32> bf_size;
+ BitField<27, 5, u32> bf_dst_bit;
+
+ u32 GetBitfieldMask() const {
+ return (1 << bf_size) - 1;
+ }
+
+ s32 GetBranchTarget() const {
+ return static_cast<s32>(immediate * sizeof(u32));
+ }
+};
+
+union MethodAddress {
+ u32 raw;
+ BitField<0, 12, u32> address;
+ BitField<12, 6, u32> increment;
+};
+
+} // namespace Macro
+
+class HLEMacro;
+
+class CachedMacro {
+public:
+ virtual ~CachedMacro() = default;
+ /**
+ * Executes the macro code with the specified input parameters.
+ * @param code The macro byte code to execute
+ * @param parameters The parameters of the macro
+ */
+ virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
+};
+
+class MacroEngine {
+public:
+ explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
+ virtual ~MacroEngine();
+
+ // Store the uploaded macro code to compile them when they're called.
+ void AddCode(u32 method, u32 data);
+
+ // Compiles the macro if its not in the cache, and executes the compiled macro
+ void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
+
+protected:
+ virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
+
+private:
+ struct CacheInfo {
+ std::unique_ptr<CachedMacro> lle_program{};
+ std::unique_ptr<CachedMacro> hle_program{};
+ u64 hash{};
+ bool has_hle_program{};
+ };
+
+ std::unordered_map<u32, CacheInfo> macro_cache;
+ std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
+ std::unique_ptr<HLEMacro> hle_macros;
+};
+
+std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
new file mode 100644
index 000000000..410f99018
--- /dev/null
+++ b/src/video_core/macro/macro_hle.cpp
@@ -0,0 +1,113 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/macro/macro_hle.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra {
+
+namespace {
+// HLE'd functions
+static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
+ const std::vector<u32>& parameters) {
+ const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
+
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
+ ~(0x3ffffff << 26)));
+ maxwell3d.regs.vb_base_instance = parameters[5];
+ maxwell3d.mme_draw.instance_count = instance_count;
+ maxwell3d.regs.vb_element_base = parameters[3];
+ maxwell3d.regs.index_array.count = parameters[1];
+ maxwell3d.regs.index_array.first = parameters[4];
+
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.GetRasterizer().Draw(true, true);
+ }
+ maxwell3d.regs.index_array.count = 0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+
+static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
+ const std::vector<u32>& parameters) {
+ const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+
+ maxwell3d.regs.vertex_buffer.first = parameters[3];
+ maxwell3d.regs.vertex_buffer.count = parameters[1];
+ maxwell3d.regs.vb_base_instance = parameters[4];
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
+ maxwell3d.mme_draw.instance_count = count;
+
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.GetRasterizer().Draw(false, true);
+ }
+ maxwell3d.regs.vertex_buffer.count = 0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+
+static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
+ const std::vector<u32>& parameters) {
+ const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+ const u32 element_base = parameters[4];
+ const u32 base_instance = parameters[5];
+ maxwell3d.regs.index_array.first = parameters[3];
+ maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
+ maxwell3d.regs.index_array.count = parameters[1];
+ maxwell3d.regs.vb_element_base = element_base;
+ maxwell3d.regs.vb_base_instance = base_instance;
+ maxwell3d.mme_draw.instance_count = instance_count;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, element_base);
+ maxwell3d.CallMethodFromMME(0x8e5, base_instance);
+ maxwell3d.regs.draw.topology.Assign(
+ static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
+ if (maxwell3d.ShouldExecute()) {
+ maxwell3d.GetRasterizer().Draw(true, true);
+ }
+ maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
+ maxwell3d.regs.index_array.count = 0;
+ maxwell3d.regs.vb_element_base = 0x0;
+ maxwell3d.regs.vb_base_instance = 0x0;
+ maxwell3d.mme_draw.instance_count = 0;
+ maxwell3d.CallMethodFromMME(0x8e3, 0x640);
+ maxwell3d.CallMethodFromMME(0x8e4, 0x0);
+ maxwell3d.CallMethodFromMME(0x8e5, 0x0);
+ maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
+}
+} // namespace
+
+constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
+ std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0),
+ std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD),
+ std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7),
+}};
+
+HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+HLEMacro::~HLEMacro() = default;
+
+std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
+ const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
+ [hash](const auto& pair) { return pair.first == hash; });
+ if (it == hle_funcs.end()) {
+ return std::nullopt;
+ }
+ return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
+}
+
+HLEMacroImpl::~HLEMacroImpl() = default;
+
+HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
+ : maxwell3d(maxwell3d), func(func) {}
+
+void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
+ func(maxwell3d, parameters);
+}
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
new file mode 100644
index 000000000..37af875a0
--- /dev/null
+++ b/src/video_core/macro/macro_hle.h
@@ -0,0 +1,44 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <vector>
+#include "common/common_types.h"
+#include "video_core/macro/macro.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+}
+
+using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
+
+class HLEMacro {
+public:
+ explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
+ ~HLEMacro();
+
+ std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
+
+private:
+ Engines::Maxwell3D& maxwell3d;
+};
+
+class HLEMacroImpl : public CachedMacro {
+public:
+ explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
+ ~HLEMacroImpl();
+
+ void Execute(const std::vector<u32>& parameters, u32 method) override;
+
+private:
+ Engines::Maxwell3D& maxwell3d;
+ HLEFunction func;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index 947364928..aa5256419 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -1,4 +1,4 @@
-// Copyright 2018 yuzu Emulator Project
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@@ -6,109 +6,47 @@
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/macro_interpreter.h"
+#include "video_core/macro/macro_interpreter.h"
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
namespace Tegra {
-namespace {
-enum class Operation : u32 {
- ALU = 0,
- AddImmediate = 1,
- ExtractInsert = 2,
- ExtractShiftLeftImmediate = 3,
- ExtractShiftLeftRegister = 4,
- Read = 5,
- Unused = 6, // This operation doesn't seem to be a valid encoding.
- Branch = 7,
-};
-} // Anonymous namespace
-
-enum class MacroInterpreter::ALUOperation : u32 {
- Add = 0,
- AddWithCarry = 1,
- Subtract = 2,
- SubtractWithBorrow = 3,
- // Operations 4-7 don't seem to be valid encodings.
- Xor = 8,
- Or = 9,
- And = 10,
- AndNot = 11,
- Nand = 12
-};
-
-enum class MacroInterpreter::ResultOperation : u32 {
- IgnoreAndFetch = 0,
- Move = 1,
- MoveAndSetMethod = 2,
- FetchAndSend = 3,
- MoveAndSend = 4,
- FetchAndSetMethod = 5,
- MoveAndSetMethodFetchAndSend = 6,
- MoveAndSetMethodSend = 7
-};
-
-enum class MacroInterpreter::BranchCondition : u32 {
- Zero = 0,
- NotZero = 1,
-};
-
-union MacroInterpreter::Opcode {
- u32 raw;
- BitField<0, 3, Operation> operation;
- BitField<4, 3, ResultOperation> result_operation;
- BitField<4, 1, BranchCondition> branch_condition;
- // If set on a branch, then the branch doesn't have a delay slot.
- BitField<5, 1, u32> branch_annul;
- BitField<7, 1, u32> is_exit;
- BitField<8, 3, u32> dst;
- BitField<11, 3, u32> src_a;
- BitField<14, 3, u32> src_b;
- // The signed immediate overlaps the second source operand and the alu operation.
- BitField<14, 18, s32> immediate;
-
- BitField<17, 5, ALUOperation> alu_operation;
-
- // Bitfield instructions data
- BitField<17, 5, u32> bf_src_bit;
- BitField<22, 5, u32> bf_size;
- BitField<27, 5, u32> bf_dst_bit;
-
- u32 GetBitfieldMask() const {
- return (1 << bf_size) - 1;
- }
+MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
+ : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
- s32 GetBranchTarget() const {
- return static_cast<s32>(immediate * sizeof(u32));
- }
-};
+std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
+ return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
+}
-MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
+MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d,
+ const std::vector<u32>& code)
+ : maxwell3d(maxwell3d), code(code) {}
-void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) {
+void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) {
MICROPROFILE_SCOPE(MacroInterp);
Reset();
registers[1] = parameters[0];
+ num_parameters = parameters.size();
if (num_parameters > parameters_capacity) {
parameters_capacity = num_parameters;
this->parameters = std::make_unique<u32[]>(num_parameters);
}
- std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32));
+ std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
this->num_parameters = num_parameters;
// Execute the code until we hit an exit condition.
bool keep_executing = true;
while (keep_executing) {
- keep_executing = Step(offset, false);
+ keep_executing = Step(false);
}
// Assert the the macro used all the input parameters
ASSERT(next_parameter_index == num_parameters);
}
-void MacroInterpreter::Reset() {
+void MacroInterpreterImpl::Reset() {
registers = {};
pc = 0;
delayed_pc = {};
@@ -120,10 +58,10 @@ void MacroInterpreter::Reset() {
carry_flag = false;
}
-bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
+bool MacroInterpreterImpl::Step(bool is_delay_slot) {
u32 base_address = pc;
- Opcode opcode = GetOpcode(offset);
+ Macro::Opcode opcode = GetOpcode();
pc += 4;
// Update the program counter if we were delayed
@@ -134,18 +72,18 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
}
switch (opcode.operation) {
- case Operation::ALU: {
+ case Macro::Operation::ALU: {
u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
GetRegister(opcode.src_b));
ProcessResult(opcode.result_operation, opcode.dst, result);
break;
}
- case Operation::AddImmediate: {
+ case Macro::Operation::AddImmediate: {
ProcessResult(opcode.result_operation, opcode.dst,
GetRegister(opcode.src_a) + opcode.immediate);
break;
}
- case Operation::ExtractInsert: {
+ case Macro::Operation::ExtractInsert: {
u32 dst = GetRegister(opcode.src_a);
u32 src = GetRegister(opcode.src_b);
@@ -155,7 +93,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
ProcessResult(opcode.result_operation, opcode.dst, dst);
break;
}
- case Operation::ExtractShiftLeftImmediate: {
+ case Macro::Operation::ExtractShiftLeftImmediate: {
u32 dst = GetRegister(opcode.src_a);
u32 src = GetRegister(opcode.src_b);
@@ -164,7 +102,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
ProcessResult(opcode.result_operation, opcode.dst, result);
break;
}
- case Operation::ExtractShiftLeftRegister: {
+ case Macro::Operation::ExtractShiftLeftRegister: {
u32 dst = GetRegister(opcode.src_a);
u32 src = GetRegister(opcode.src_b);
@@ -173,12 +111,12 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
ProcessResult(opcode.result_operation, opcode.dst, result);
break;
}
- case Operation::Read: {
+ case Macro::Operation::Read: {
u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
ProcessResult(opcode.result_operation, opcode.dst, result);
break;
}
- case Operation::Branch: {
+ case Macro::Operation::Branch: {
ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
u32 value = GetRegister(opcode.src_a);
bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
@@ -191,7 +129,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
delayed_pc = base_address + opcode.GetBranchTarget();
// Execute one more instruction due to the delay slot.
- return Step(offset, true);
+ return Step(true);
}
break;
}
@@ -204,51 +142,44 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
// cause an exit if it's executed inside a delay slot.
if (opcode.is_exit && !is_delay_slot) {
// Exit has a delay slot, execute the next instruction
- Step(offset, true);
+ Step(true);
return false;
}
return true;
}
-MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
- const auto& macro_memory{maxwell3d.GetMacroMemory()};
- ASSERT((pc % sizeof(u32)) == 0);
- ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
- return {macro_memory[offset + pc / sizeof(u32)]};
-}
-
-u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) {
+u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) {
switch (operation) {
- case ALUOperation::Add: {
+ case Macro::ALUOperation::Add: {
const u64 result{static_cast<u64>(src_a) + src_b};
carry_flag = result > 0xffffffff;
return static_cast<u32>(result);
}
- case ALUOperation::AddWithCarry: {
+ case Macro::ALUOperation::AddWithCarry: {
const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
carry_flag = result > 0xffffffff;
return static_cast<u32>(result);
}
- case ALUOperation::Subtract: {
+ case Macro::ALUOperation::Subtract: {
const u64 result{static_cast<u64>(src_a) - src_b};
carry_flag = result < 0x100000000;
return static_cast<u32>(result);
}
- case ALUOperation::SubtractWithBorrow: {
+ case Macro::ALUOperation::SubtractWithBorrow: {
const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
carry_flag = result < 0x100000000;
return static_cast<u32>(result);
}
- case ALUOperation::Xor:
+ case Macro::ALUOperation::Xor:
return src_a ^ src_b;
- case ALUOperation::Or:
+ case Macro::ALUOperation::Or:
return src_a | src_b;
- case ALUOperation::And:
+ case Macro::ALUOperation::And:
return src_a & src_b;
- case ALUOperation::AndNot:
+ case Macro::ALUOperation::AndNot:
return src_a & ~src_b;
- case ALUOperation::Nand:
+ case Macro::ALUOperation::Nand:
return ~(src_a & src_b);
default:
@@ -257,43 +188,43 @@ u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b)
}
}
-void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) {
+void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) {
switch (operation) {
- case ResultOperation::IgnoreAndFetch:
+ case Macro::ResultOperation::IgnoreAndFetch:
// Fetch parameter and ignore result.
SetRegister(reg, FetchParameter());
break;
- case ResultOperation::Move:
+ case Macro::ResultOperation::Move:
// Move result.
SetRegister(reg, result);
break;
- case ResultOperation::MoveAndSetMethod:
+ case Macro::ResultOperation::MoveAndSetMethod:
// Move result and use as Method Address.
SetRegister(reg, result);
SetMethodAddress(result);
break;
- case ResultOperation::FetchAndSend:
+ case Macro::ResultOperation::FetchAndSend:
// Fetch parameter and send result.
SetRegister(reg, FetchParameter());
Send(result);
break;
- case ResultOperation::MoveAndSend:
+ case Macro::ResultOperation::MoveAndSend:
// Move and send result.
SetRegister(reg, result);
Send(result);
break;
- case ResultOperation::FetchAndSetMethod:
+ case Macro::ResultOperation::FetchAndSetMethod:
// Fetch parameter and use result as Method Address.
SetRegister(reg, FetchParameter());
SetMethodAddress(result);
break;
- case ResultOperation::MoveAndSetMethodFetchAndSend:
+ case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
// Move result and use as Method Address, then fetch and send parameter.
SetRegister(reg, result);
SetMethodAddress(result);
Send(FetchParameter());
break;
- case ResultOperation::MoveAndSetMethodSend:
+ case Macro::ResultOperation::MoveAndSetMethodSend:
// Move result and use as Method Address, then send bits 12:17 of result.
SetRegister(reg, result);
SetMethodAddress(result);
@@ -304,16 +235,28 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
}
}
-u32 MacroInterpreter::FetchParameter() {
- ASSERT(next_parameter_index < num_parameters);
- return parameters[next_parameter_index++];
+bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const {
+ switch (cond) {
+ case Macro::BranchCondition::Zero:
+ return value == 0;
+ case Macro::BranchCondition::NotZero:
+ return value != 0;
+ }
+ UNREACHABLE();
+ return true;
+}
+
+Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
+ ASSERT((pc % sizeof(u32)) == 0);
+ ASSERT(pc < code.size() * sizeof(u32));
+ return {code[pc / sizeof(u32)]};
}
-u32 MacroInterpreter::GetRegister(u32 register_id) const {
+u32 MacroInterpreterImpl::GetRegister(u32 register_id) const {
return registers.at(register_id);
}
-void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
+void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
// Register 0 is hardwired as the zero register.
// Ensure no writes to it actually occur.
if (register_id == 0) {
@@ -323,30 +266,24 @@ void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
registers.at(register_id) = value;
}
-void MacroInterpreter::SetMethodAddress(u32 address) {
+void MacroInterpreterImpl::SetMethodAddress(u32 address) {
method_address.raw = address;
}
-void MacroInterpreter::Send(u32 value) {
+void MacroInterpreterImpl::Send(u32 value) {
maxwell3d.CallMethodFromMME(method_address.address, value);
// Increment the method address by the method increment.
method_address.address.Assign(method_address.address.Value() +
method_address.increment.Value());
}
-u32 MacroInterpreter::Read(u32 method) const {
+u32 MacroInterpreterImpl::Read(u32 method) const {
return maxwell3d.GetRegisterValue(method);
}
-bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const {
- switch (cond) {
- case BranchCondition::Zero:
- return value == 0;
- case BranchCondition::NotZero:
- return value != 0;
- }
- UNREACHABLE();
- return true;
+u32 MacroInterpreterImpl::FetchParameter() {
+ ASSERT(next_parameter_index < num_parameters);
+ return parameters[next_parameter_index++];
}
} // namespace Tegra
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h
index 631146d89..90217fc89 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro/macro_interpreter.h
@@ -1,44 +1,37 @@
-// Copyright 2018 yuzu Emulator Project
+// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
-
#include <array>
#include <optional>
-
+#include <vector>
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "video_core/macro/macro.h"
namespace Tegra {
namespace Engines {
class Maxwell3D;
}
-class MacroInterpreter final {
+class MacroInterpreter final : public MacroEngine {
public:
explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
- /**
- * Executes the macro code with the specified input parameters.
- * @param offset Offset to start execution at.
- * @param parameters The parameters of the macro.
- */
- void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
+protected:
+ std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
private:
- enum class ALUOperation : u32;
- enum class BranchCondition : u32;
- enum class ResultOperation : u32;
-
- union Opcode;
+ Engines::Maxwell3D& maxwell3d;
+};
- union MethodAddress {
- u32 raw;
- BitField<0, 12, u32> address;
- BitField<12, 6, u32> increment;
- };
+class MacroInterpreterImpl : public CachedMacro {
+public:
+ MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
+ void Execute(const std::vector<u32>& parameters, u32 method) override;
+private:
/// Resets the execution engine state, zeroing registers, etc.
void Reset();
@@ -49,20 +42,20 @@ private:
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
* previous instruction.
*/
- bool Step(u32 offset, bool is_delay_slot);
+ bool Step(bool is_delay_slot);
/// Calculates the result of an ALU operation. src_a OP src_b;
- u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b);
+ u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
/// Performs the result operation on the input result and stores it in the specified register
/// (if necessary).
- void ProcessResult(ResultOperation operation, u32 reg, u32 result);
+ void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
/// Evaluates the branch condition and returns whether the branch should be taken or not.
- bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
+ bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
/// Reads an opcode at the current program counter location.
- Opcode GetOpcode(u32 offset) const;
+ Macro::Opcode GetOpcode() const;
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
u32 GetRegister(u32 register_id) const;
@@ -89,13 +82,11 @@ private:
/// Program counter to execute at after the delay slot is executed.
std::optional<u32> delayed_pc;
- static constexpr std::size_t NumMacroRegisters = 8;
-
/// General purpose macro registers.
- std::array<u32, NumMacroRegisters> registers = {};
+ std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
/// Method address to use for the next Send instruction.
- MethodAddress method_address = {};
+ Macro::MethodAddress method_address = {};
/// Input parameters of the current macro.
std::unique_ptr<u32[]> parameters;
@@ -105,5 +96,7 @@ private:
u32 next_parameter_index = 0;
bool carry_flag = false;
+ const std::vector<u32>& code;
};
+
} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
new file mode 100644
index 000000000..07292702f
--- /dev/null
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -0,0 +1,621 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/microprofile.h"
+#include "common/x64/xbyak_util.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/macro/macro_interpreter.h"
+#include "video_core/macro/macro_jit_x64.h"
+
+MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47));
+MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
+
+namespace Tegra {
+static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
+static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
+static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
+
+static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
+ STATE,
+ RESULT,
+ PARAMETERS,
+ METHOD_ADDRESS,
+ BRANCH_HOLDER,
+});
+
+MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
+ : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
+
+std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
+ return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
+}
+
+MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code)
+ : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) {
+ Compile();
+}
+
+MacroJITx64Impl::~MacroJITx64Impl() = default;
+
+void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
+ MICROPROFILE_SCOPE(MacroJitExecute);
+ ASSERT_OR_EXECUTE(program != nullptr, { return; });
+ JITState state{};
+ state.maxwell3d = &maxwell3d;
+ state.registers = {};
+ program(&state, parameters.data());
+}
+
+void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
+ const bool is_a_zero = opcode.src_a == 0;
+ const bool is_b_zero = opcode.src_b == 0;
+ const bool valid_operation = !is_a_zero && !is_b_zero;
+ [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
+ const bool has_zero_register = is_a_zero || is_b_zero;
+ const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
+ opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
+
+ Xbyak::Reg32 src_a;
+ Xbyak::Reg32 src_b;
+
+ if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
+ src_a = Compile_GetRegister(opcode.src_a, RESULT);
+ src_b = Compile_GetRegister(opcode.src_b, eax);
+ } else {
+ if (!is_a_zero) {
+ src_a = Compile_GetRegister(opcode.src_a, RESULT);
+ }
+ if (!is_b_zero) {
+ src_b = Compile_GetRegister(opcode.src_b, eax);
+ }
+ }
+
+ bool has_emitted = false;
+
+ switch (opcode.alu_operation) {
+ case Macro::ALUOperation::Add:
+ if (optimizer.zero_reg_skip) {
+ if (valid_operation) {
+ add(src_a, src_b);
+ }
+ } else {
+ add(src_a, src_b);
+ }
+
+ if (!optimizer.can_skip_carry) {
+ setc(byte[STATE + offsetof(JITState, carry_flag)]);
+ }
+ break;
+ case Macro::ALUOperation::AddWithCarry:
+ bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
+ adc(src_a, src_b);
+ setc(byte[STATE + offsetof(JITState, carry_flag)]);
+ break;
+ case Macro::ALUOperation::Subtract:
+ if (optimizer.zero_reg_skip) {
+ if (valid_operation) {
+ sub(src_a, src_b);
+ has_emitted = true;
+ }
+ } else {
+ sub(src_a, src_b);
+ has_emitted = true;
+ }
+ if (!optimizer.can_skip_carry && has_emitted) {
+ setc(byte[STATE + offsetof(JITState, carry_flag)]);
+ }
+ break;
+ case Macro::ALUOperation::SubtractWithBorrow:
+ bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
+ sbb(src_a, src_b);
+ setc(byte[STATE + offsetof(JITState, carry_flag)]);
+ break;
+ case Macro::ALUOperation::Xor:
+ if (optimizer.zero_reg_skip) {
+ if (valid_operation) {
+ xor_(src_a, src_b);
+ }
+ } else {
+ xor_(src_a, src_b);
+ }
+ break;
+ case Macro::ALUOperation::Or:
+ if (optimizer.zero_reg_skip) {
+ if (valid_operation) {
+ or_(src_a, src_b);
+ }
+ } else {
+ or_(src_a, src_b);
+ }
+ break;
+ case Macro::ALUOperation::And:
+ if (optimizer.zero_reg_skip) {
+ if (!has_zero_register) {
+ and_(src_a, src_b);
+ }
+ } else {
+ and_(src_a, src_b);
+ }
+ break;
+ case Macro::ALUOperation::AndNot:
+ if (optimizer.zero_reg_skip) {
+ if (!is_a_zero) {
+ not_(src_b);
+ and_(src_a, src_b);
+ }
+ } else {
+ not_(src_b);
+ and_(src_a, src_b);
+ }
+ break;
+ case Macro::ALUOperation::Nand:
+ if (optimizer.zero_reg_skip) {
+ if (!is_a_zero) {
+ and_(src_a, src_b);
+ not_(src_a);
+ }
+ } else {
+ and_(src_a, src_b);
+ not_(src_a);
+ }
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented ALU operation {}",
+ static_cast<std::size_t>(opcode.alu_operation.Value()));
+ break;
+ }
+ Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
+ if (optimizer.skip_dummy_addimmediate) {
+ // Games tend to use this as an exit instruction placeholder. It's to encode an instruction
+ // without doing anything. In our case we can just not emit anything.
+ if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) {
+ return;
+ }
+ }
+ // Check for redundant moves
+ if (optimizer.optimize_for_method_move &&
+ opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
+ if (next_opcode.has_value()) {
+ const auto next = *next_opcode;
+ if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
+ opcode.dst == next.dst) {
+ return;
+ }
+ }
+ }
+ if (optimizer.zero_reg_skip && opcode.src_a == 0) {
+ if (opcode.immediate == 0) {
+ xor_(RESULT, RESULT);
+ } else {
+ mov(RESULT, opcode.immediate);
+ }
+ } else {
+ auto result = Compile_GetRegister(opcode.src_a, RESULT);
+ if (opcode.immediate > 2) {
+ add(result, opcode.immediate);
+ } else if (opcode.immediate == 1) {
+ inc(result);
+ } else if (opcode.immediate < 0) {
+ sub(result, opcode.immediate * -1);
+ }
+ }
+ Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
+ auto dst = Compile_GetRegister(opcode.src_a, RESULT);
+ auto src = Compile_GetRegister(opcode.src_b, eax);
+
+ if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) {
+ shr(src, opcode.bf_src_bit);
+ } else if (opcode.bf_src_bit == 31) {
+ xor_(src, src);
+ }
+ // Don't bother masking the whole register since we're using a 32 bit register
+ if (opcode.bf_size != 31 && opcode.bf_size != 0) {
+ and_(src, opcode.GetBitfieldMask());
+ } else if (opcode.bf_size == 0) {
+ xor_(src, src);
+ }
+ if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) {
+ shl(src, opcode.bf_dst_bit);
+ } else if (opcode.bf_dst_bit == 31) {
+ xor_(src, src);
+ }
+
+ const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
+ if (mask != 0xffffffff) {
+ and_(dst, mask);
+ }
+ or_(dst, src);
+ Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
+ const auto dst = Compile_GetRegister(opcode.src_a, ecx);
+ const auto src = Compile_GetRegister(opcode.src_b, RESULT);
+
+ shr(src, dst.cvt8());
+ if (opcode.bf_size != 0 && opcode.bf_size != 31) {
+ and_(src, opcode.GetBitfieldMask());
+ } else if (opcode.bf_size == 0) {
+ xor_(src, src);
+ }
+
+ if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) {
+ shl(src, opcode.bf_dst_bit);
+ } else if (opcode.bf_dst_bit == 31) {
+ xor_(src, src);
+ }
+ Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
+ const auto dst = Compile_GetRegister(opcode.src_a, ecx);
+ const auto src = Compile_GetRegister(opcode.src_b, RESULT);
+
+ if (opcode.bf_src_bit != 0) {
+ shr(src, opcode.bf_src_bit);
+ }
+
+ if (opcode.bf_size != 31) {
+ and_(src, opcode.GetBitfieldMask());
+ }
+ shl(src, dst.cvt8());
+
+ Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
+ if (optimizer.zero_reg_skip && opcode.src_a == 0) {
+ if (opcode.immediate == 0) {
+ xor_(RESULT, RESULT);
+ } else {
+ mov(RESULT, opcode.immediate);
+ }
+ } else {
+ auto result = Compile_GetRegister(opcode.src_a, RESULT);
+ if (opcode.immediate > 2) {
+ add(result, opcode.immediate);
+ } else if (opcode.immediate == 1) {
+ inc(result);
+ } else if (opcode.immediate < 0) {
+ sub(result, opcode.immediate * -1);
+ }
+ }
+
+ // Equivalent to Engines::Maxwell3D::GetRegisterValue:
+ if (optimizer.enable_asserts) {
+ Xbyak::Label pass_range_check;
+ cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
+ jb(pass_range_check);
+ int3();
+ L(pass_range_check);
+ }
+ mov(rax, qword[STATE]);
+ mov(RESULT,
+ dword[rax + offsetof(Engines::Maxwell3D, regs) +
+ offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
+
+ Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
+ maxwell3d->CallMethodFromMME(method_address.address, value);
+}
+
+void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(Common::X64::ABI_PARAM1, qword[STATE]);
+ mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
+ mov(Common::X64::ABI_PARAM3, value);
+ Common::X64::CallFarFunction(*this, &Send);
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+
+ Xbyak::Label dont_process{};
+ // Get increment
+ test(METHOD_ADDRESS, 0x3f000);
+ // If zero, method address doesn't update
+ je(dont_process);
+
+ mov(ecx, METHOD_ADDRESS);
+ and_(METHOD_ADDRESS, 0xfff);
+ shr(ecx, 12);
+ and_(ecx, 0x3f);
+ lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
+ sal(ecx, 12);
+ or_(eax, ecx);
+
+ mov(METHOD_ADDRESS, eax);
+
+ L(dont_process);
+}
+
+void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
+ ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
+ const s32 jump_address =
+ static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
+
+ Xbyak::Label end;
+ auto value = Compile_GetRegister(opcode.src_a, eax);
+ test(value, value);
+ if (optimizer.has_delayed_pc) {
+ switch (opcode.branch_condition) {
+ case Macro::BranchCondition::Zero:
+ jne(end, T_NEAR);
+ break;
+ case Macro::BranchCondition::NotZero:
+ je(end, T_NEAR);
+ break;
+ }
+
+ if (opcode.branch_annul) {
+ xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+ jmp(labels[jump_address], T_NEAR);
+ } else {
+ Xbyak::Label handle_post_exit{};
+ Xbyak::Label skip{};
+ jmp(skip, T_NEAR);
+ if (opcode.is_exit) {
+ L(handle_post_exit);
+ // Execute 1 instruction
+ mov(BRANCH_HOLDER, end_of_code);
+ // Jump to next instruction to skip delay slot check
+ jmp(labels[jump_address], T_NEAR);
+ } else {
+ L(handle_post_exit);
+ xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+ jmp(labels[jump_address], T_NEAR);
+ }
+ L(skip);
+ mov(BRANCH_HOLDER, handle_post_exit);
+ jmp(delay_skip[pc], T_NEAR);
+ }
+ } else {
+ switch (opcode.branch_condition) {
+ case Macro::BranchCondition::Zero:
+ je(labels[jump_address], T_NEAR);
+ break;
+ case Macro::BranchCondition::NotZero:
+ jne(labels[jump_address], T_NEAR);
+ break;
+ }
+ }
+
+ L(end);
+}
+
+void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
+ optimizer.can_skip_carry = true;
+ optimizer.has_delayed_pc = false;
+ for (auto raw_op : code) {
+ Macro::Opcode op{};
+ op.raw = raw_op;
+
+ if (op.operation == Macro::Operation::ALU) {
+ // Scan for any ALU operations which actually use the carry flag, if they don't exist in
+ // our current code we can skip emitting the carry flag handling operations
+ if (op.alu_operation == Macro::ALUOperation::AddWithCarry ||
+ op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) {
+ optimizer.can_skip_carry = false;
+ }
+ }
+
+ if (op.operation == Macro::Operation::Branch) {
+ if (!op.branch_annul) {
+ optimizer.has_delayed_pc = true;
+ }
+ }
+ }
+}
+
+void MacroJITx64Impl::Compile() {
+ MICROPROFILE_SCOPE(MacroJitCompile);
+ bool keep_executing = true;
+ labels.fill(Xbyak::Label());
+
+ Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+ // JIT state
+ mov(STATE, Common::X64::ABI_PARAM1);
+ mov(PARAMETERS, Common::X64::ABI_PARAM2);
+ xor_(RESULT, RESULT);
+ xor_(METHOD_ADDRESS, METHOD_ADDRESS);
+ xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+
+ mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
+
+ // Track get register for zero registers and mark it as no-op
+ optimizer.zero_reg_skip = true;
+
+ // AddImmediate tends to be used as a NOP instruction, if we detect this we can
+ // completely skip the entire code path and no emit anything
+ optimizer.skip_dummy_addimmediate = true;
+
+ // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
+ // one if our register isn't "dirty"
+ optimizer.optimize_for_method_move = true;
+
+ // Enable run-time assertions in JITted code
+ optimizer.enable_asserts = false;
+
+ // Check to see if we can skip emitting certain instructions
+ Optimizer_ScanFlags();
+
+ const u32 op_count = static_cast<u32>(code.size());
+ for (u32 i = 0; i < op_count; i++) {
+ if (i < op_count - 1) {
+ pc = i + 1;
+ next_opcode = GetOpCode();
+ } else {
+ next_opcode = {};
+ }
+ pc = i;
+ Compile_NextInstruction();
+ }
+
+ L(end_of_code);
+
+ Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+ ret();
+ ready();
+ program = getCode<ProgramType>();
+}
+
+bool MacroJITx64Impl::Compile_NextInstruction() {
+ const auto opcode = GetOpCode();
+ if (labels[pc].getAddress()) {
+ return false;
+ }
+
+ L(labels[pc]);
+
+ switch (opcode.operation) {
+ case Macro::Operation::ALU:
+ Compile_ALU(opcode);
+ break;
+ case Macro::Operation::AddImmediate:
+ Compile_AddImmediate(opcode);
+ break;
+ case Macro::Operation::ExtractInsert:
+ Compile_ExtractInsert(opcode);
+ break;
+ case Macro::Operation::ExtractShiftLeftImmediate:
+ Compile_ExtractShiftLeftImmediate(opcode);
+ break;
+ case Macro::Operation::ExtractShiftLeftRegister:
+ Compile_ExtractShiftLeftRegister(opcode);
+ break;
+ case Macro::Operation::Read:
+ Compile_Read(opcode);
+ break;
+ case Macro::Operation::Branch:
+ Compile_Branch(opcode);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value());
+ break;
+ }
+
+ if (optimizer.has_delayed_pc) {
+ if (opcode.is_exit) {
+ mov(rax, end_of_code);
+ test(BRANCH_HOLDER, BRANCH_HOLDER);
+ cmove(BRANCH_HOLDER, rax);
+ // Jump to next instruction to skip delay slot check
+ je(labels[pc + 1], T_NEAR);
+ } else {
+ // TODO(ogniK): Optimize delay slot branching
+ Xbyak::Label no_delay_slot{};
+ test(BRANCH_HOLDER, BRANCH_HOLDER);
+ je(no_delay_slot, T_NEAR);
+ mov(rax, BRANCH_HOLDER);
+ xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+ jmp(rax);
+ L(no_delay_slot);
+ }
+ L(delay_skip[pc]);
+ if (opcode.is_exit) {
+ return false;
+ }
+ } else {
+ test(BRANCH_HOLDER, BRANCH_HOLDER);
+ jne(end_of_code, T_NEAR);
+ if (opcode.is_exit) {
+ inc(BRANCH_HOLDER);
+ return false;
+ }
+ }
+ return true;
+}
+
+Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
+ mov(eax, dword[PARAMETERS]);
+ add(PARAMETERS, sizeof(u32));
+ return eax;
+}
+
+Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
+ if (index == 0) {
+ // Register 0 is always zero
+ xor_(dst, dst);
+ } else {
+ mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
+ }
+
+ return dst;
+}
+
+void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
+ const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
+ // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
+ // register.
+ if (reg == 0) {
+ return;
+ }
+ mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
+ };
+ const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
+
+ switch (operation) {
+ case Macro::ResultOperation::IgnoreAndFetch:
+ SetRegister(reg, Compile_FetchParameter());
+ break;
+ case Macro::ResultOperation::Move:
+ SetRegister(reg, RESULT);
+ break;
+ case Macro::ResultOperation::MoveAndSetMethod:
+ SetRegister(reg, RESULT);
+ SetMethodAddress(RESULT);
+ break;
+ case Macro::ResultOperation::FetchAndSend:
+ // Fetch parameter and send result.
+ SetRegister(reg, Compile_FetchParameter());
+ Compile_Send(RESULT);
+ break;
+ case Macro::ResultOperation::MoveAndSend:
+ // Move and send result.
+ SetRegister(reg, RESULT);
+ Compile_Send(RESULT);
+ break;
+ case Macro::ResultOperation::FetchAndSetMethod:
+ // Fetch parameter and use result as Method Address.
+ SetRegister(reg, Compile_FetchParameter());
+ SetMethodAddress(RESULT);
+ break;
+ case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
+ // Move result and use as Method Address, then fetch and send parameter.
+ SetRegister(reg, RESULT);
+ SetMethodAddress(RESULT);
+ Compile_Send(Compile_FetchParameter());
+ break;
+ case Macro::ResultOperation::MoveAndSetMethodSend:
+ // Move result and use as Method Address, then send bits 12:17 of result.
+ SetRegister(reg, RESULT);
+ SetMethodAddress(RESULT);
+ shr(RESULT, 12);
+ and_(RESULT, 0b111111);
+ Compile_Send(RESULT);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation));
+ }
+}
+
+Macro::Opcode MacroJITx64Impl::GetOpCode() const {
+ ASSERT(pc < code.size());
+ return {code[pc]};
+}
+
+std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const {
+ return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
+}
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
new file mode 100644
index 000000000..a180e7428
--- /dev/null
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -0,0 +1,98 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <bitset>
+#include <xbyak.h>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "common/x64/xbyak_abi.h"
+#include "video_core/macro/macro.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+}
+
+/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
+constexpr size_t MAX_CODE_SIZE = 0x10000;
+
+class MacroJITx64 final : public MacroEngine {
+public:
+ explicit MacroJITx64(Engines::Maxwell3D& maxwell3d);
+
+protected:
+ std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
+
+private:
+ Engines::Maxwell3D& maxwell3d;
+};
+
+class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
+public:
+ MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
+ ~MacroJITx64Impl();
+
+ void Execute(const std::vector<u32>& parameters, u32 method) override;
+
+ void Compile_ALU(Macro::Opcode opcode);
+ void Compile_AddImmediate(Macro::Opcode opcode);
+ void Compile_ExtractInsert(Macro::Opcode opcode);
+ void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
+ void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
+ void Compile_Read(Macro::Opcode opcode);
+ void Compile_Branch(Macro::Opcode opcode);
+
+private:
+ void Optimizer_ScanFlags();
+
+ void Compile();
+ bool Compile_NextInstruction();
+
+ Xbyak::Reg32 Compile_FetchParameter();
+ Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
+
+ void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
+ void Compile_Send(Xbyak::Reg32 value);
+
+ Macro::Opcode GetOpCode() const;
+ std::bitset<32> PersistentCallerSavedRegs() const;
+
+ struct JITState {
+ Engines::Maxwell3D* maxwell3d{};
+ std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
+ u32 carry_flag{};
+ };
+ static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
+ using ProgramType = void (*)(JITState*, const u32*);
+
+ struct OptimizerState {
+ bool can_skip_carry{};
+ bool has_delayed_pc{};
+ bool zero_reg_skip{};
+ bool skip_dummy_addimmediate{};
+ bool optimize_for_method_move{};
+ bool enable_asserts{};
+ };
+ OptimizerState optimizer{};
+
+ std::optional<Macro::Opcode> next_opcode{};
+ ProgramType program{nullptr};
+
+ std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
+ std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
+ Xbyak::Label end_of_code{};
+
+ bool is_delay_slot{};
+ u32 pc{};
+ std::optional<u32> delayed_pc;
+
+ const std::vector<u32>& code;
+ Engines::Maxwell3D& maxwell3d;
+};
+
+} // namespace Tegra
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index dbee9f634..ff5505d12 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si
return range == inner_size;
}
-void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const {
+void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
+ const std::size_t size) const {
std::size_t remaining_size{size};
- std::size_t page_index{src_addr >> page_bits};
- std::size_t page_offset{src_addr & page_mask};
+ std::size_t page_index{gpu_src_addr >> page_bits};
+ std::size_t page_offset{gpu_src_addr & page_mask};
auto& memory = system.Memory();
@@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
}
}
-void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
+void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
const std::size_t size) const {
std::size_t remaining_size{size};
- std::size_t page_index{src_addr >> page_bits};
- std::size_t page_offset{src_addr & page_mask};
+ std::size_t page_index{gpu_src_addr >> page_bits};
+ std::size_t page_offset{gpu_src_addr & page_mask};
auto& memory = system.Memory();
@@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
}
}
-void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) {
+void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
+ const std::size_t size) {
std::size_t remaining_size{size};
- std::size_t page_index{dest_addr >> page_bits};
- std::size_t page_offset{dest_addr & page_mask};
+ std::size_t page_index{gpu_dest_addr >> page_bits};
+ std::size_t page_offset{gpu_dest_addr & page_mask};
auto& memory = system.Memory();
@@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
}
}
-void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
+void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
const std::size_t size) {
std::size_t remaining_size{size};
- std::size_t page_index{dest_addr >> page_bits};
- std::size_t page_offset{dest_addr & page_mask};
+ std::size_t page_index{gpu_dest_addr >> page_bits};
+ std::size_t page_offset{gpu_dest_addr & page_mask};
auto& memory = system.Memory();
@@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
}
}
-void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
+void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
+ const std::size_t size) {
std::vector<u8> tmp_buffer(size);
- ReadBlock(src_addr, tmp_buffer.data(), size);
- WriteBlock(dest_addr, tmp_buffer.data(), size);
+ ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
+ WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
}
-void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
+void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
+ const std::size_t size) {
std::vector<u8> tmp_buffer(size);
- ReadBlockUnsafe(src_addr, tmp_buffer.data(), size);
- WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
+ ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
+ WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
}
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 0ddd52d5a..87658e87a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -79,9 +79,9 @@ public:
* in the Host Memory counterpart. Note: This functions cause Host GPU Memory
* Flushes and Invalidations, respectively to each operation.
*/
- void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
- void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
- void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
+ void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
+ void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
/**
* ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -93,9 +93,9 @@ public:
* WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
* being flushed.
*/
- void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const;
- void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
- void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
+ void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
+ void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
+ void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
/**
* IsGranularRange checks if a gpu region can be simply read with a pointer
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 6d522c318..836b25c1d 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -83,6 +83,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::RGBA8_SRGB>,
MortonCopy<true, PixelFormat::RG8U>,
MortonCopy<true, PixelFormat::RG8S>,
+ MortonCopy<true, PixelFormat::RG8UI>,
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::RGBX16F>,
MortonCopy<true, PixelFormat::R32UI>,
@@ -166,6 +167,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::RGBA8_SRGB>,
MortonCopy<false, PixelFormat::RG8U>,
MortonCopy<false, PixelFormat::RG8S>,
+ MortonCopy<false, PixelFormat::RG8UI>,
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::RGBX16F>,
MortonCopy<false, PixelFormat::R32UI>,
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 2f75f8801..e12dab899 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -220,8 +220,8 @@ private:
return cache_begin < addr_end && addr_begin < cache_end;
};
- const u64 page_end = addr_end >> PAGE_SHIFT;
- for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
+ const u64 page_end = addr_end >> PAGE_BITS;
+ for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) {
const auto& it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
continue;
@@ -242,14 +242,14 @@ private:
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
- const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
+ const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS;
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
host_ptr);
}
/// Tries to a get a cached query. Returns nullptr on failure.
CachedQuery* TryGet(VAddr addr) {
- const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
+ const u64 page = static_cast<u64>(addr) >> PAGE_BITS;
const auto it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
return nullptr;
@@ -268,7 +268,7 @@ private:
}
static constexpr std::uintptr_t PAGE_SIZE = 4096;
- static constexpr unsigned PAGE_SHIFT = 12;
+ static constexpr unsigned PAGE_BITS = 12;
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
deleted file mode 100644
index 093b2cdf4..000000000
--- a/src/video_core/rasterizer_cache.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/rasterizer_cache.h"
-
-RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
deleted file mode 100644
index 22987751e..000000000
--- a/src/video_core/rasterizer_cache.h
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <mutex>
-#include <set>
-#include <unordered_map>
-
-#include <boost/icl/interval_map.hpp>
-#include <boost/range/iterator_range_core.hpp>
-
-#include "common/common_types.h"
-#include "core/settings.h"
-#include "video_core/gpu.h"
-#include "video_core/rasterizer_interface.h"
-
-class RasterizerCacheObject {
-public:
- explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
-
- virtual ~RasterizerCacheObject();
-
- VAddr GetCpuAddr() const {
- return cpu_addr;
- }
-
- /// Gets the size of the shader in guest memory, required for cache management
- virtual std::size_t GetSizeInBytes() const = 0;
-
- /// Sets whether the cached object should be considered registered
- void SetIsRegistered(bool registered) {
- is_registered = registered;
- }
-
- /// Returns true if the cached object is registered
- bool IsRegistered() const {
- return is_registered;
- }
-
- /// Returns true if the cached object is dirty
- bool IsDirty() const {
- return is_dirty;
- }
-
- /// Returns ticks from when this cached object was last modified
- u64 GetLastModifiedTicks() const {
- return last_modified_ticks;
- }
-
- /// Marks an object as recently modified, used to specify whether it is clean or dirty
- template <class T>
- void MarkAsModified(bool dirty, T& cache) {
- is_dirty = dirty;
- last_modified_ticks = cache.GetModifiedTicks();
- }
-
-private:
- bool is_registered{}; ///< Whether the object is currently registered with the cache
- bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
- u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
- VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
-};
-
-template <class T>
-class RasterizerCache : NonCopyable {
- friend class RasterizerCacheObject;
-
-public:
- explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
-
- /// Write any cached resources overlapping the specified region back to memory
- void FlushRegion(VAddr addr, std::size_t size) {
- std::lock_guard lock{mutex};
-
- const auto& objects{GetSortedObjectsFromRegion(addr, size)};
- for (auto& object : objects) {
- FlushObject(object);
- }
- }
-
- /// Mark the specified region as being invalidated
- void InvalidateRegion(VAddr addr, u64 size) {
- std::lock_guard lock{mutex};
-
- const auto& objects{GetSortedObjectsFromRegion(addr, size)};
- for (auto& object : objects) {
- if (!object->IsRegistered()) {
- // Skip duplicates
- continue;
- }
- Unregister(object);
- }
- }
-
- /// Invalidates everything in the cache
- void InvalidateAll() {
- std::lock_guard lock{mutex};
-
- while (interval_cache.begin() != interval_cache.end()) {
- Unregister(*interval_cache.begin()->second.begin());
- }
- }
-
-protected:
- /// Tries to get an object from the cache with the specified cache address
- T TryGet(VAddr addr) const {
- const auto iter = map_cache.find(addr);
- if (iter != map_cache.end())
- return iter->second;
- return nullptr;
- }
-
- /// Register an object into the cache
- virtual void Register(const T& object) {
- std::lock_guard lock{mutex};
-
- object->SetIsRegistered(true);
- interval_cache.add({GetInterval(object), ObjectSet{object}});
- map_cache.insert({object->GetCpuAddr(), object});
- rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
- }
-
- /// Unregisters an object from the cache
- virtual void Unregister(const T& object) {
- std::lock_guard lock{mutex};
-
- object->SetIsRegistered(false);
- rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
- const VAddr addr = object->GetCpuAddr();
- interval_cache.subtract({GetInterval(object), ObjectSet{object}});
- map_cache.erase(addr);
- }
-
- /// Returns a ticks counter used for tracking when cached objects were last modified
- u64 GetModifiedTicks() {
- std::lock_guard lock{mutex};
-
- return ++modified_ticks;
- }
-
- virtual void FlushObjectInner(const T& object) = 0;
-
- /// Flushes the specified object, updating appropriate cache state as needed
- void FlushObject(const T& object) {
- std::lock_guard lock{mutex};
-
- if (!object->IsDirty()) {
- return;
- }
- FlushObjectInner(object);
- object->MarkAsModified(false, *this);
- }
-
- std::recursive_mutex mutex;
-
-private:
- /// Returns a list of cached objects from the specified memory region, ordered by access time
- std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
- if (size == 0) {
- return {};
- }
-
- std::vector<T> objects;
- const ObjectInterval interval{addr, addr + size};
- for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
- for (auto& cached_object : pair.second) {
- if (!cached_object) {
- continue;
- }
- objects.push_back(cached_object);
- }
- }
-
- std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
- return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
- });
-
- return objects;
- }
-
- using ObjectSet = std::set<T>;
- using ObjectCache = std::unordered_map<VAddr, T>;
- using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
- using ObjectInterval = typename IntervalCache::interval_type;
-
- static auto GetInterval(const T& object) {
- return ObjectInterval::right_open(object->GetCpuAddr(),
- object->GetCpuAddr() + object->GetSizeInBytes());
- }
-
- ObjectCache map_cache;
- IntervalCache interval_cache; ///< Cache of objects
- u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
- VideoCore::RasterizerInterface& rasterizer;
-};
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
new file mode 100644
index 000000000..eb5158407
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -0,0 +1,2073 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <variant>
+
+#include <fmt/format.h>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_arb_decompiler.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/shader/registry.h"
+#include "video_core/shader/shader_ir.h"
+
+// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
+// GLASM lacks booleans, so they have to be implemented as integers.
+// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
+// select between two values, because -1 will be evaluated as true and 0 as false.
+
+namespace OpenGL {
+
+namespace {
+
+using Tegra::Engines::ShaderType;
+using Tegra::Shader::Attribute;
+using Tegra::Shader::PixelImap;
+using Tegra::Shader::Register;
+using namespace VideoCommon::Shader;
+using Operation = const OperationNode&;
+
+constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
+
+char Swizzle(std::size_t component) {
+ ASSERT(component < 4);
+ return component["xyzw"];
+}
+
+constexpr bool IsGenericAttribute(Attribute::Index index) {
+ return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
+}
+
+u32 GetGenericAttributeIndex(Attribute::Index index) {
+ ASSERT(IsGenericAttribute(index));
+ return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
+std::string_view Modifiers(Operation operation) {
+ const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
+ if (meta && meta->precise) {
+ return ".PREC";
+ }
+ return "";
+}
+
+std::string_view GetInputFlags(PixelImap attribute) {
+ switch (attribute) {
+ case PixelImap::Perspective:
+ return "";
+ case PixelImap::Constant:
+ return "FLAT ";
+ case PixelImap::ScreenLinear:
+ return "NOPERSPECTIVE ";
+ case PixelImap::Unused:
+ break;
+ }
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
+ return {};
+}
+
+std::string_view ImageType(Tegra::Shader::ImageType image_type) {
+ switch (image_type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return "1D";
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return "BUFFER";
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return "ARRAY1D";
+ case Tegra::Shader::ImageType::Texture2D:
+ return "2D";
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return "ARRAY2D";
+ case Tegra::Shader::ImageType::Texture3D:
+ return "3D";
+ }
+ UNREACHABLE();
+ return {};
+}
+
+std::string_view StackName(MetaStackClass stack) {
+ switch (stack) {
+ case MetaStackClass::Ssy:
+ return "SSY";
+ case MetaStackClass::Pbk:
+ return "PBK";
+ }
+ UNREACHABLE();
+ return "";
+};
+
+std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
+ switch (topology) {
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
+ return "POINTS";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
+ return "LINES";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
+ return "LINES_ADJACENCY";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
+ return "TRIANGLES";
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
+ case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
+ return "TRIANGLES_ADJACENCY";
+ default:
+ UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ return "POINTS";
+ }
+}
+
+std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
+ switch (topology) {
+ case Tegra::Shader::OutputTopology::PointList:
+ return "POINTS";
+ case Tegra::Shader::OutputTopology::LineStrip:
+ return "LINE_STRIP";
+ case Tegra::Shader::OutputTopology::TriangleStrip:
+ return "TRIANGLE_STRIP";
+ default:
+ UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
+ return "points";
+ }
+}
+
+std::string_view StageInputName(ShaderType stage) {
+ switch (stage) {
+ case ShaderType::Vertex:
+ case ShaderType::Geometry:
+ return "vertex";
+ case ShaderType::Fragment:
+ return "fragment";
+ case ShaderType::Compute:
+ return "invocation";
+ default:
+ UNREACHABLE();
+ return "";
+ }
+}
+
+std::string TextureType(const MetaTexture& meta) {
+ if (meta.sampler.is_buffer) {
+ return "BUFFER";
+ }
+ std::string type;
+ if (meta.sampler.is_shadow) {
+ type += "SHADOW";
+ }
+ if (meta.sampler.is_array) {
+ type += "ARRAY";
+ }
+ type += [&meta] {
+ switch (meta.sampler.type) {
+ case Tegra::Shader::TextureType::Texture1D:
+ return "1D";
+ case Tegra::Shader::TextureType::Texture2D:
+ return "2D";
+ case Tegra::Shader::TextureType::Texture3D:
+ return "3D";
+ case Tegra::Shader::TextureType::TextureCube:
+ return "CUBE";
+ }
+ UNREACHABLE();
+ return "2D";
+ }();
+ return type;
+}
+
+std::string GlobalMemoryName(const GlobalMemoryBase& base) {
+ return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
+}
+
+class ARBDecompiler final {
+public:
+ explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier);
+
+ std::string Code() const {
+ return shader_source;
+ }
+
+private:
+ void DeclareHeader();
+ void DeclareVertex();
+ void DeclareGeometry();
+ void DeclareFragment();
+ void DeclareCompute();
+ void DeclareInputAttributes();
+ void DeclareOutputAttributes();
+ void DeclareLocalMemory();
+ void DeclareGlobalMemory();
+ void DeclareConstantBuffers();
+ void DeclareRegisters();
+ void DeclareTemporaries();
+ void DeclarePredicates();
+ void DeclareInternalFlags();
+
+ void InitializeVariables();
+
+ void DecompileAST();
+ void DecompileBranchMode();
+
+ void VisitAST(const ASTNode& node);
+ std::string VisitExpression(const Expr& node);
+
+ void VisitBlock(const NodeBlock& bb);
+
+ std::string Visit(const Node& node);
+
+ std::pair<std::string, std::size_t> BuildCoords(Operation);
+ std::string BuildAoffi(Operation);
+ void Exit();
+
+ std::string Assign(Operation);
+ std::string Select(Operation);
+ std::string FClamp(Operation);
+ std::string FCastHalf0(Operation);
+ std::string FCastHalf1(Operation);
+ std::string FSqrt(Operation);
+ std::string FSwizzleAdd(Operation);
+ std::string HAdd2(Operation);
+ std::string HMul2(Operation);
+ std::string HFma2(Operation);
+ std::string HAbsolute(Operation);
+ std::string HNegate(Operation);
+ std::string HClamp(Operation);
+ std::string HCastFloat(Operation);
+ std::string HUnpack(Operation);
+ std::string HMergeF32(Operation);
+ std::string HMergeH0(Operation);
+ std::string HMergeH1(Operation);
+ std::string HPack2(Operation);
+ std::string LogicalAssign(Operation);
+ std::string LogicalPick2(Operation);
+ std::string LogicalAnd2(Operation);
+ std::string FloatOrdered(Operation);
+ std::string FloatUnordered(Operation);
+ std::string LogicalAddCarry(Operation);
+ std::string Texture(Operation);
+ std::string TextureGather(Operation);
+ std::string TextureQueryDimensions(Operation);
+ std::string TextureQueryLod(Operation);
+ std::string TexelFetch(Operation);
+ std::string TextureGradient(Operation);
+ std::string ImageLoad(Operation);
+ std::string ImageStore(Operation);
+ std::string Branch(Operation);
+ std::string BranchIndirect(Operation);
+ std::string PushFlowStack(Operation);
+ std::string PopFlowStack(Operation);
+ std::string Exit(Operation);
+ std::string Discard(Operation);
+ std::string EmitVertex(Operation);
+ std::string EndPrimitive(Operation);
+ std::string InvocationId(Operation);
+ std::string YNegate(Operation);
+ std::string ThreadId(Operation);
+ std::string ShuffleIndexed(Operation);
+ std::string Barrier(Operation);
+ std::string MemoryBarrierGroup(Operation);
+ std::string MemoryBarrierGlobal(Operation);
+
+ template <const std::string_view& op>
+ std::string Unary(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
+ return temporary;
+ }
+
+ template <const std::string_view& op>
+ std::string Binary(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
+ Visit(operation[1]));
+ return temporary;
+ }
+
+ template <const std::string_view& op>
+ std::string Trinary(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
+ Visit(operation[1]), Visit(operation[2]));
+ return temporary;
+ }
+
+ template <const std::string_view& op, bool unordered>
+ std::string FloatComparison(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+
+ const std::string op_a = Visit(operation[0]);
+ const std::string op_b = Visit(operation[1]);
+ if constexpr (unordered) {
+ AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+ AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), -1;", temporary);
+ } else if (op == SNE_F) {
+ AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), 0;", temporary);
+ AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
+ AddLine("TRUNC.U.CC RC.x, RC.x;");
+ AddLine("MOV.S {} (NE.x), 0;", temporary);
+ }
+ return temporary;
+ }
+
+ template <const std::string_view& op, bool is_nan>
+ std::string HalfComparison(Operation operation) {
+ std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ const std::string op_a = Visit(operation[0]);
+ const std::string op_b = Visit(operation[1]);
+ AddLine("UP2H.F {}, {};", tmp1, op_a);
+ AddLine("UP2H.F {}, {};", tmp2, op_b);
+ AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
+ AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
+ AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
+ AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
+ AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
+ if constexpr (is_nan) {
+ AddLine("MOVC.F RC.x, {};", op_a);
+ AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
+ AddLine("MOVC.F RC.x, {};", op_b);
+ AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
+ }
+ return tmp1;
+ }
+
+ template <const std::string_view& op, const std::string_view& type>
+ std::string AtomicImage(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t num_coords = operation.GetOperandsCount();
+ const std::size_t num_values = meta.values.size();
+
+ const std::string coord = AllocVectorTemporary();
+ const std::string value = AllocVectorTemporary();
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
+ }
+ for (std::size_t i = 0; i < num_values; ++i) {
+ AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
+ }
+
+ AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
+ image_id, ImageType(meta.image.type));
+ return fmt::format("{}.x", coord);
+ }
+
+ template <const std::string_view& op, const std::string_view& type>
+ std::string Atomic(Operation operation) {
+ std::string temporary = AllocTemporary();
+ std::string address;
+ std::string_view opname;
+ if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
+ AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
+ Visit(gmem->GetBaseAddress()));
+ address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
+ opname = "ATOMB";
+ } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
+ address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
+ opname = "ATOMS";
+ } else {
+ UNREACHABLE();
+ return "{0, 0, 0, 0}";
+ }
+ AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
+ return temporary;
+ }
+
+ template <char type>
+ std::string Negate(Operation operation) {
+ std::string temporary = AllocTemporary();
+ if constexpr (type == 'F') {
+ AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
+ } else {
+ AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
+ }
+ return temporary;
+ }
+
+ template <char type>
+ std::string Absolute(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
+ return temporary;
+ }
+
+ template <char type>
+ std::string BitfieldInsert(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
+ AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
+ AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
+ Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+ }
+
+ template <char type>
+ std::string BitfieldExtract(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
+ AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
+ AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+ }
+
+ template <char swizzle>
+ std::string LocalInvocationId(Operation) {
+ return fmt::format("invocation.localid.{}", swizzle);
+ }
+
+ template <char swizzle>
+ std::string WorkGroupId(Operation) {
+ return fmt::format("invocation.groupid.{}", swizzle);
+ }
+
+ template <char c1, char c2>
+ std::string ThreadMask(Operation) {
+ return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
+ }
+
+ template <typename... Args>
+ void AddExpression(std::string_view text, Args&&... args) {
+ shader_source += fmt::format(text, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ void AddLine(std::string_view text, Args&&... args) {
+ AddExpression(text, std::forward<Args>(args)...);
+ shader_source += '\n';
+ }
+
+ std::string AllocTemporary() {
+ max_temporaries = std::max(max_temporaries, num_temporaries + 1);
+ return fmt::format("T{}.x", num_temporaries++);
+ }
+
+ std::string AllocVectorTemporary() {
+ max_temporaries = std::max(max_temporaries, num_temporaries + 1);
+ return fmt::format("T{}", num_temporaries++);
+ }
+
+ void ResetTemporaries() noexcept {
+ num_temporaries = 0;
+ }
+
+ const Device& device;
+ const ShaderIR& ir;
+ const Registry& registry;
+ const ShaderType stage;
+
+ std::size_t num_temporaries = 0;
+ std::size_t max_temporaries = 0;
+
+ std::string shader_source;
+
+ static constexpr std::string_view ADD_F32 = "ADD.F32";
+ static constexpr std::string_view ADD_S = "ADD.S";
+ static constexpr std::string_view ADD_U = "ADD.U";
+ static constexpr std::string_view MUL_F32 = "MUL.F32";
+ static constexpr std::string_view MUL_S = "MUL.S";
+ static constexpr std::string_view MUL_U = "MUL.U";
+ static constexpr std::string_view DIV_F32 = "DIV.F32";
+ static constexpr std::string_view DIV_S = "DIV.S";
+ static constexpr std::string_view DIV_U = "DIV.U";
+ static constexpr std::string_view MAD_F32 = "MAD.F32";
+ static constexpr std::string_view RSQ_F32 = "RSQ.F32";
+ static constexpr std::string_view COS_F32 = "COS.F32";
+ static constexpr std::string_view SIN_F32 = "SIN.F32";
+ static constexpr std::string_view EX2_F32 = "EX2.F32";
+ static constexpr std::string_view LG2_F32 = "LG2.F32";
+ static constexpr std::string_view SLT_F = "SLT.F32";
+ static constexpr std::string_view SLT_S = "SLT.S";
+ static constexpr std::string_view SLT_U = "SLT.U";
+ static constexpr std::string_view SEQ_F = "SEQ.F32";
+ static constexpr std::string_view SEQ_S = "SEQ.S";
+ static constexpr std::string_view SEQ_U = "SEQ.U";
+ static constexpr std::string_view SLE_F = "SLE.F32";
+ static constexpr std::string_view SLE_S = "SLE.S";
+ static constexpr std::string_view SLE_U = "SLE.U";
+ static constexpr std::string_view SGT_F = "SGT.F32";
+ static constexpr std::string_view SGT_S = "SGT.S";
+ static constexpr std::string_view SGT_U = "SGT.U";
+ static constexpr std::string_view SNE_F = "SNE.F32";
+ static constexpr std::string_view SNE_S = "SNE.S";
+ static constexpr std::string_view SNE_U = "SNE.U";
+ static constexpr std::string_view SGE_F = "SGE.F32";
+ static constexpr std::string_view SGE_S = "SGE.S";
+ static constexpr std::string_view SGE_U = "SGE.U";
+ static constexpr std::string_view AND_S = "AND.S";
+ static constexpr std::string_view AND_U = "AND.U";
+ static constexpr std::string_view TRUNC_F = "TRUNC.F";
+ static constexpr std::string_view TRUNC_S = "TRUNC.S";
+ static constexpr std::string_view TRUNC_U = "TRUNC.U";
+ static constexpr std::string_view SHL_S = "SHL.S";
+ static constexpr std::string_view SHL_U = "SHL.U";
+ static constexpr std::string_view SHR_S = "SHR.S";
+ static constexpr std::string_view SHR_U = "SHR.U";
+ static constexpr std::string_view OR_S = "OR.S";
+ static constexpr std::string_view OR_U = "OR.U";
+ static constexpr std::string_view XOR_S = "XOR.S";
+ static constexpr std::string_view XOR_U = "XOR.U";
+ static constexpr std::string_view NOT_S = "NOT.S";
+ static constexpr std::string_view NOT_U = "NOT.U";
+ static constexpr std::string_view BTC_S = "BTC.S";
+ static constexpr std::string_view BTC_U = "BTC.U";
+ static constexpr std::string_view BTFM_S = "BTFM.S";
+ static constexpr std::string_view BTFM_U = "BTFM.U";
+ static constexpr std::string_view ROUND_F = "ROUND.F";
+ static constexpr std::string_view CEIL_F = "CEIL.F";
+ static constexpr std::string_view FLR_F = "FLR.F";
+ static constexpr std::string_view I2F_S = "I2F.S";
+ static constexpr std::string_view I2F_U = "I2F.U";
+ static constexpr std::string_view MIN_F = "MIN.F";
+ static constexpr std::string_view MIN_S = "MIN.S";
+ static constexpr std::string_view MIN_U = "MIN.U";
+ static constexpr std::string_view MAX_F = "MAX.F";
+ static constexpr std::string_view MAX_S = "MAX.S";
+ static constexpr std::string_view MAX_U = "MAX.U";
+ static constexpr std::string_view MOV_U = "MOV.U";
+ static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
+ static constexpr std::string_view TGALL_U = "TGALL.U";
+ static constexpr std::string_view TGANY_U = "TGANY.U";
+ static constexpr std::string_view TGEQ_U = "TGEQ.U";
+ static constexpr std::string_view EXCH = "EXCH";
+ static constexpr std::string_view ADD = "ADD";
+ static constexpr std::string_view MIN = "MIN";
+ static constexpr std::string_view MAX = "MAX";
+ static constexpr std::string_view AND = "AND";
+ static constexpr std::string_view OR = "OR";
+ static constexpr std::string_view XOR = "XOR";
+ static constexpr std::string_view U32 = "U32";
+ static constexpr std::string_view S32 = "S32";
+
+ static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
+ using DecompilerType = std::string (ARBDecompiler::*)(Operation);
+ static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
+ &ARBDecompiler::Assign,
+
+ &ARBDecompiler::Select,
+
+ &ARBDecompiler::Binary<ADD_F32>,
+ &ARBDecompiler::Binary<MUL_F32>,
+ &ARBDecompiler::Binary<DIV_F32>,
+ &ARBDecompiler::Trinary<MAD_F32>,
+ &ARBDecompiler::Negate<'F'>,
+ &ARBDecompiler::Absolute<'F'>,
+ &ARBDecompiler::FClamp,
+ &ARBDecompiler::FCastHalf0,
+ &ARBDecompiler::FCastHalf1,
+ &ARBDecompiler::Binary<MIN_F>,
+ &ARBDecompiler::Binary<MAX_F>,
+ &ARBDecompiler::Unary<COS_F32>,
+ &ARBDecompiler::Unary<SIN_F32>,
+ &ARBDecompiler::Unary<EX2_F32>,
+ &ARBDecompiler::Unary<LG2_F32>,
+ &ARBDecompiler::Unary<RSQ_F32>,
+ &ARBDecompiler::FSqrt,
+ &ARBDecompiler::Unary<ROUND_F>,
+ &ARBDecompiler::Unary<FLR_F>,
+ &ARBDecompiler::Unary<CEIL_F>,
+ &ARBDecompiler::Unary<TRUNC_F>,
+ &ARBDecompiler::Unary<I2F_S>,
+ &ARBDecompiler::Unary<I2F_U>,
+ &ARBDecompiler::FSwizzleAdd,
+
+ &ARBDecompiler::Binary<ADD_S>,
+ &ARBDecompiler::Binary<MUL_S>,
+ &ARBDecompiler::Binary<DIV_S>,
+ &ARBDecompiler::Negate<'S'>,
+ &ARBDecompiler::Absolute<'S'>,
+ &ARBDecompiler::Binary<MIN_S>,
+ &ARBDecompiler::Binary<MAX_S>,
+
+ &ARBDecompiler::Unary<TRUNC_S>,
+ &ARBDecompiler::Unary<MOV_U>,
+ &ARBDecompiler::Binary<SHL_S>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<SHR_S>,
+ &ARBDecompiler::Binary<AND_S>,
+ &ARBDecompiler::Binary<OR_S>,
+ &ARBDecompiler::Binary<XOR_S>,
+ &ARBDecompiler::Unary<NOT_S>,
+ &ARBDecompiler::BitfieldInsert<'S'>,
+ &ARBDecompiler::BitfieldExtract<'S'>,
+ &ARBDecompiler::Unary<BTC_S>,
+ &ARBDecompiler::Unary<BTFM_S>,
+
+ &ARBDecompiler::Binary<ADD_U>,
+ &ARBDecompiler::Binary<MUL_U>,
+ &ARBDecompiler::Binary<DIV_U>,
+ &ARBDecompiler::Binary<MIN_U>,
+ &ARBDecompiler::Binary<MAX_U>,
+ &ARBDecompiler::Unary<TRUNC_U>,
+ &ARBDecompiler::Unary<MOV_U>,
+ &ARBDecompiler::Binary<SHL_U>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<SHR_U>,
+ &ARBDecompiler::Binary<AND_U>,
+ &ARBDecompiler::Binary<OR_U>,
+ &ARBDecompiler::Binary<XOR_U>,
+ &ARBDecompiler::Unary<NOT_U>,
+ &ARBDecompiler::BitfieldInsert<'U'>,
+ &ARBDecompiler::BitfieldExtract<'U'>,
+ &ARBDecompiler::Unary<BTC_U>,
+ &ARBDecompiler::Unary<BTFM_U>,
+
+ &ARBDecompiler::HAdd2,
+ &ARBDecompiler::HMul2,
+ &ARBDecompiler::HFma2,
+ &ARBDecompiler::HAbsolute,
+ &ARBDecompiler::HNegate,
+ &ARBDecompiler::HClamp,
+ &ARBDecompiler::HCastFloat,
+ &ARBDecompiler::HUnpack,
+ &ARBDecompiler::HMergeF32,
+ &ARBDecompiler::HMergeH0,
+ &ARBDecompiler::HMergeH1,
+ &ARBDecompiler::HPack2,
+
+ &ARBDecompiler::LogicalAssign,
+ &ARBDecompiler::Binary<AND_U>,
+ &ARBDecompiler::Binary<OR_U>,
+ &ARBDecompiler::Binary<XOR_U>,
+ &ARBDecompiler::Unary<NOT_U>,
+ &ARBDecompiler::LogicalPick2,
+ &ARBDecompiler::LogicalAnd2,
+
+ &ARBDecompiler::FloatComparison<SLT_F, false>,
+ &ARBDecompiler::FloatComparison<SEQ_F, false>,
+ &ARBDecompiler::FloatComparison<SLE_F, false>,
+ &ARBDecompiler::FloatComparison<SGT_F, false>,
+ &ARBDecompiler::FloatComparison<SNE_F, false>,
+ &ARBDecompiler::FloatComparison<SGE_F, false>,
+ &ARBDecompiler::FloatOrdered,
+ &ARBDecompiler::FloatUnordered,
+ &ARBDecompiler::FloatComparison<SLT_F, true>,
+ &ARBDecompiler::FloatComparison<SEQ_F, true>,
+ &ARBDecompiler::FloatComparison<SLE_F, true>,
+ &ARBDecompiler::FloatComparison<SGT_F, true>,
+ &ARBDecompiler::FloatComparison<SNE_F, true>,
+ &ARBDecompiler::FloatComparison<SGE_F, true>,
+
+ &ARBDecompiler::Binary<SLT_S>,
+ &ARBDecompiler::Binary<SEQ_S>,
+ &ARBDecompiler::Binary<SLE_S>,
+ &ARBDecompiler::Binary<SGT_S>,
+ &ARBDecompiler::Binary<SNE_S>,
+ &ARBDecompiler::Binary<SGE_S>,
+
+ &ARBDecompiler::Binary<SLT_U>,
+ &ARBDecompiler::Binary<SEQ_U>,
+ &ARBDecompiler::Binary<SLE_U>,
+ &ARBDecompiler::Binary<SGT_U>,
+ &ARBDecompiler::Binary<SNE_U>,
+ &ARBDecompiler::Binary<SGE_U>,
+
+ &ARBDecompiler::LogicalAddCarry,
+
+ &ARBDecompiler::HalfComparison<SLT_F, false>,
+ &ARBDecompiler::HalfComparison<SEQ_F, false>,
+ &ARBDecompiler::HalfComparison<SLE_F, false>,
+ &ARBDecompiler::HalfComparison<SGT_F, false>,
+ &ARBDecompiler::HalfComparison<SNE_F, false>,
+ &ARBDecompiler::HalfComparison<SGE_F, false>,
+ &ARBDecompiler::HalfComparison<SLT_F, true>,
+ &ARBDecompiler::HalfComparison<SEQ_F, true>,
+ &ARBDecompiler::HalfComparison<SLE_F, true>,
+ &ARBDecompiler::HalfComparison<SGT_F, true>,
+ &ARBDecompiler::HalfComparison<SNE_F, true>,
+ &ARBDecompiler::HalfComparison<SGE_F, true>,
+
+ &ARBDecompiler::Texture,
+ &ARBDecompiler::Texture,
+ &ARBDecompiler::TextureGather,
+ &ARBDecompiler::TextureQueryDimensions,
+ &ARBDecompiler::TextureQueryLod,
+ &ARBDecompiler::TexelFetch,
+ &ARBDecompiler::TextureGradient,
+
+ &ARBDecompiler::ImageLoad,
+ &ARBDecompiler::ImageStore,
+
+ &ARBDecompiler::AtomicImage<ADD, U32>,
+ &ARBDecompiler::AtomicImage<AND, U32>,
+ &ARBDecompiler::AtomicImage<OR, U32>,
+ &ARBDecompiler::AtomicImage<XOR, U32>,
+ &ARBDecompiler::AtomicImage<EXCH, U32>,
+
+ &ARBDecompiler::Atomic<EXCH, U32>,
+ &ARBDecompiler::Atomic<ADD, U32>,
+ &ARBDecompiler::Atomic<MIN, U32>,
+ &ARBDecompiler::Atomic<MAX, U32>,
+ &ARBDecompiler::Atomic<AND, U32>,
+ &ARBDecompiler::Atomic<OR, U32>,
+ &ARBDecompiler::Atomic<XOR, U32>,
+
+ &ARBDecompiler::Atomic<EXCH, S32>,
+ &ARBDecompiler::Atomic<ADD, S32>,
+ &ARBDecompiler::Atomic<MIN, S32>,
+ &ARBDecompiler::Atomic<MAX, S32>,
+ &ARBDecompiler::Atomic<AND, S32>,
+ &ARBDecompiler::Atomic<OR, S32>,
+ &ARBDecompiler::Atomic<XOR, S32>,
+
+ &ARBDecompiler::Atomic<ADD, U32>,
+ &ARBDecompiler::Atomic<MIN, U32>,
+ &ARBDecompiler::Atomic<MAX, U32>,
+ &ARBDecompiler::Atomic<AND, U32>,
+ &ARBDecompiler::Atomic<OR, U32>,
+ &ARBDecompiler::Atomic<XOR, U32>,
+
+ &ARBDecompiler::Atomic<ADD, S32>,
+ &ARBDecompiler::Atomic<MIN, S32>,
+ &ARBDecompiler::Atomic<MAX, S32>,
+ &ARBDecompiler::Atomic<AND, S32>,
+ &ARBDecompiler::Atomic<OR, S32>,
+ &ARBDecompiler::Atomic<XOR, S32>,
+
+ &ARBDecompiler::Branch,
+ &ARBDecompiler::BranchIndirect,
+ &ARBDecompiler::PushFlowStack,
+ &ARBDecompiler::PopFlowStack,
+ &ARBDecompiler::Exit,
+ &ARBDecompiler::Discard,
+
+ &ARBDecompiler::EmitVertex,
+ &ARBDecompiler::EndPrimitive,
+
+ &ARBDecompiler::InvocationId,
+ &ARBDecompiler::YNegate,
+ &ARBDecompiler::LocalInvocationId<'x'>,
+ &ARBDecompiler::LocalInvocationId<'y'>,
+ &ARBDecompiler::LocalInvocationId<'z'>,
+ &ARBDecompiler::WorkGroupId<'x'>,
+ &ARBDecompiler::WorkGroupId<'y'>,
+ &ARBDecompiler::WorkGroupId<'z'>,
+
+ &ARBDecompiler::Unary<TGBALLOT_U>,
+ &ARBDecompiler::Unary<TGALL_U>,
+ &ARBDecompiler::Unary<TGANY_U>,
+ &ARBDecompiler::Unary<TGEQ_U>,
+
+ &ARBDecompiler::ThreadId,
+ &ARBDecompiler::ThreadMask<'e', 'q'>,
+ &ARBDecompiler::ThreadMask<'g', 'e'>,
+ &ARBDecompiler::ThreadMask<'g', 't'>,
+ &ARBDecompiler::ThreadMask<'l', 'e'>,
+ &ARBDecompiler::ThreadMask<'l', 't'>,
+ &ARBDecompiler::ShuffleIndexed,
+
+ &ARBDecompiler::Barrier,
+ &ARBDecompiler::MemoryBarrierGroup,
+ &ARBDecompiler::MemoryBarrierGlobal,
+ };
+};
+
+ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier)
+ : device{device}, ir{ir}, registry{registry}, stage{stage} {
+ AddLine("TEMP RC;");
+ AddLine("TEMP FSWZA[4];");
+ AddLine("TEMP FSWZB[4];");
+ if (ir.IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ }
+ AddLine("END");
+
+ const std::string code = std::move(shader_source);
+ DeclareHeader();
+ DeclareVertex();
+ DeclareGeometry();
+ DeclareFragment();
+ DeclareCompute();
+ DeclareInputAttributes();
+ DeclareOutputAttributes();
+ DeclareLocalMemory();
+ DeclareGlobalMemory();
+ DeclareConstantBuffers();
+ DeclareRegisters();
+ DeclareTemporaries();
+ DeclarePredicates();
+ DeclareInternalFlags();
+
+ shader_source += code;
+}
+
+std::string_view HeaderStageName(ShaderType stage) {
+ switch (stage) {
+ case ShaderType::Vertex:
+ return "vp";
+ case ShaderType::Geometry:
+ return "gp";
+ case ShaderType::Fragment:
+ return "fp";
+ case ShaderType::Compute:
+ return "cp";
+ default:
+ UNREACHABLE();
+ return "";
+ }
+}
+
+void ARBDecompiler::DeclareHeader() {
+ AddLine("!!NV{}5.0", HeaderStageName(stage));
+ // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
+ AddLine("OPTION NV_internal;");
+ AddLine("OPTION NV_gpu_program_fp64;");
+ AddLine("OPTION NV_shader_storage_buffer;");
+ AddLine("OPTION NV_shader_thread_group;");
+ if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
+ AddLine("OPTION NV_shader_thread_shuffle;");
+ }
+ if (stage == ShaderType::Vertex) {
+ if (device.HasNvViewportArray2()) {
+ AddLine("OPTION NV_viewport_array2;");
+ }
+ }
+ if (stage == ShaderType::Fragment) {
+ AddLine("OPTION ARB_draw_buffers;");
+ }
+ if (device.HasImageLoadFormatted()) {
+ AddLine("OPTION EXT_shader_image_load_formatted;");
+ }
+}
+
+void ARBDecompiler::DeclareVertex() {
+ if (stage != ShaderType::Vertex) {
+ return;
+ }
+ AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
+}
+
+void ARBDecompiler::DeclareGeometry() {
+ if (stage != ShaderType::Geometry) {
+ return;
+ }
+ const auto& info = registry.GetGraphicsInfo();
+ const auto& header = ir.GetHeader();
+ AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
+ AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
+ AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
+ AddLine("ATTRIB vertex_position = vertex.position;");
+}
+
+void ARBDecompiler::DeclareFragment() {
+ if (stage != ShaderType::Fragment) {
+ return;
+ }
+ AddLine("OUTPUT result_color7 = result.color[7];");
+ AddLine("OUTPUT result_color6 = result.color[6];");
+ AddLine("OUTPUT result_color5 = result.color[5];");
+ AddLine("OUTPUT result_color4 = result.color[4];");
+ AddLine("OUTPUT result_color3 = result.color[3];");
+ AddLine("OUTPUT result_color2 = result.color[2];");
+ AddLine("OUTPUT result_color1 = result.color[1];");
+ AddLine("OUTPUT result_color0 = result.color;");
+}
+
+void ARBDecompiler::DeclareCompute() {
+ if (stage != ShaderType::Compute) {
+ return;
+ }
+ const ComputeInfo& info = registry.GetComputeInfo();
+ AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
+ info.workgroup_size[2]);
+ if (info.shared_memory_size_in_words > 0) {
+ const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
+ AddLine("SHARED_MEMORY {};", size_in_bytes);
+ AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
+ }
+}
+
+void ARBDecompiler::DeclareInputAttributes() {
+ if (stage == ShaderType::Compute) {
+ return;
+ }
+ const std::string_view stage_name = StageInputName(stage);
+ for (const auto attribute : ir.GetInputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+
+ std::string_view suffix;
+ if (stage == ShaderType::Fragment) {
+ const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
+ if (input_mode == PixelImap::Unused) {
+ return;
+ }
+ suffix = GetInputFlags(input_mode);
+ }
+ AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
+ index);
+ }
+}
+
+void ARBDecompiler::DeclareOutputAttributes() {
+ if (stage == ShaderType::Compute) {
+ return;
+ }
+ for (const auto attribute : ir.GetOutputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+ AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
+ }
+}
+
+void ARBDecompiler::DeclareLocalMemory() {
+ u64 size = 0;
+ if (stage == ShaderType::Compute) {
+ size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
+ } else {
+ size = ir.GetHeader().GetLocalMemorySize();
+ }
+ if (size == 0) {
+ return;
+ }
+ const u64 element_count = Common::AlignUp(size, 4) / 4;
+ AddLine("TEMP lmem[{}];", element_count);
+}
+
+void ARBDecompiler::DeclareGlobalMemory() {
+ u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
+ for (const auto& pair : ir.GetGlobalMemory()) {
+ const auto& base = pair.first;
+ AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
+ ++binding;
+ }
+}
+
+void ARBDecompiler::DeclareConstantBuffers() {
+ u32 binding = 0;
+ for (const auto& cbuf : ir.GetConstantBuffers()) {
+ AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
+ ++binding;
+ }
+}
+
+void ARBDecompiler::DeclareRegisters() {
+ for (const u32 gpr : ir.GetRegisters()) {
+ AddLine("TEMP R{};", gpr);
+ }
+}
+
+void ARBDecompiler::DeclareTemporaries() {
+ for (std::size_t i = 0; i < max_temporaries; ++i) {
+ AddLine("TEMP T{};", i);
+ }
+}
+
+void ARBDecompiler::DeclarePredicates() {
+ for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
+ AddLine("TEMP P{};", static_cast<u64>(pred));
+ }
+}
+
+void ARBDecompiler::DeclareInternalFlags() {
+ for (const char* name : INTERNAL_FLAG_NAMES) {
+ AddLine("TEMP {};", name);
+ }
+}
+
+void ARBDecompiler::InitializeVariables() {
+ AddLine("MOV.F32 FSWZA[0], -1;");
+ AddLine("MOV.F32 FSWZA[1], 1;");
+ AddLine("MOV.F32 FSWZA[2], -1;");
+ AddLine("MOV.F32 FSWZA[3], 0;");
+ AddLine("MOV.F32 FSWZB[0], -1;");
+ AddLine("MOV.F32 FSWZB[1], -1;");
+ AddLine("MOV.F32 FSWZB[2], 1;");
+ AddLine("MOV.F32 FSWZB[3], -1;");
+
+ if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
+ AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
+ }
+ for (const auto attribute : ir.GetOutputAttributes()) {
+ if (!IsGenericAttribute(attribute)) {
+ continue;
+ }
+ const u32 index = GetGenericAttributeIndex(attribute);
+ AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
+ }
+ for (const u32 gpr : ir.GetRegisters()) {
+ AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
+ }
+ for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
+ AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
+ }
+}
+
+void ARBDecompiler::DecompileAST() {
+ const u32 num_flow_variables = ir.GetASTNumVariables();
+ for (u32 i = 0; i < num_flow_variables; ++i) {
+ AddLine("TEMP F{};", i);
+ }
+ for (u32 i = 0; i < num_flow_variables; ++i) {
+ AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
+ }
+
+ InitializeVariables();
+
+ VisitAST(ir.GetASTProgram());
+}
+
+void ARBDecompiler::DecompileBranchMode() {
+ static constexpr u32 FLOW_STACK_SIZE = 20;
+ if (!ir.IsFlowStackDisabled()) {
+ AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
+ AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
+ AddLine("TEMP SSY_TOP;");
+ AddLine("TEMP PBK_TOP;");
+ }
+
+ AddLine("TEMP PC;");
+
+ if (!ir.IsFlowStackDisabled()) {
+ AddLine("MOV.U SSY_TOP.x, 0;");
+ AddLine("MOV.U PBK_TOP.x, 0;");
+ }
+
+ InitializeVariables();
+
+ const auto basic_block_end = ir.GetBasicBlocks().end();
+ auto basic_block_it = ir.GetBasicBlocks().begin();
+ const u32 first_address = basic_block_it->first;
+ AddLine("MOV.U PC.x, {};", first_address);
+
+ AddLine("REP;");
+
+ std::size_t num_blocks = 0;
+ while (basic_block_it != basic_block_end) {
+ const auto& [address, bb] = *basic_block_it;
+ ++num_blocks;
+
+ AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
+ AddLine("IF NE.x;");
+
+ VisitBlock(bb);
+
+ ++basic_block_it;
+
+ if (basic_block_it != basic_block_end) {
+ const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
+ if (!op || op->GetCode() != OperationCode::Branch) {
+ const u32 next_address = basic_block_it->first;
+ AddLine("MOV.U PC.x, {};", next_address);
+ AddLine("CONT;");
+ }
+ }
+
+ AddLine("ELSE;");
+ }
+ AddLine("RET;");
+ while (num_blocks--) {
+ AddLine("ENDIF;");
+ }
+
+ AddLine("ENDREP;");
+}
+
+void ARBDecompiler::VisitAST(const ASTNode& node) {
+ if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(ast->condition);
+ ResetTemporaries();
+
+ AddLine("MOVC.U RC.x, {};", condition);
+ AddLine("IF NE.x;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ AddLine("ENDIF;");
+ } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
+ AddLine("ELSE;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
+ VisitBlock(ast->nodes);
+ } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
+ AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
+ ResetTemporaries();
+ } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
+ const std::string condition = VisitExpression(ast->condition);
+ ResetTemporaries();
+ AddLine("REP;");
+ for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
+ VisitAST(current);
+ }
+ AddLine("MOVC.U RC.x, {};", condition);
+ AddLine("BRK (NE.x);");
+ AddLine("ENDREP;");
+ } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
+ const bool is_true = ExprIsTrue(ast->condition);
+ if (!is_true) {
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("IF NE.x;");
+ ResetTemporaries();
+ }
+ if (ast->kills) {
+ AddLine("KIL TR;");
+ } else {
+ Exit();
+ }
+ if (!is_true) {
+ AddLine("ENDIF;");
+ }
+ } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
+ if (ExprIsTrue(ast->condition)) {
+ AddLine("BRK;");
+ } else {
+ AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
+ AddLine("BRK (NE.x);");
+ ResetTemporaries();
+ }
+ } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
+ // Nothing to do
+ } else {
+ UNREACHABLE();
+ }
+}
+
+std::string ARBDecompiler::VisitExpression(const Expr& node) {
+ if (const auto expr = std::get_if<ExprAnd>(&*node)) {
+ std::string result = AllocTemporary();
+ AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
+ VisitExpression(expr->operand2));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprOr>(&*node)) {
+ std::string result = AllocTemporary();
+ AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
+ VisitExpression(expr->operand2));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprNot>(&*node)) {
+ std::string result = AllocTemporary();
+ AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
+ return result;
+ }
+ if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
+ return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
+ }
+ if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
+ return Visit(ir.GetConditionCode(expr->cc));
+ }
+ if (const auto expr = std::get_if<ExprVar>(&*node)) {
+ return fmt::format("F{}.x", expr->var_index);
+ }
+ if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
+ return expr->value ? "0xffffffff" : "0";
+ }
+ if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
+ std::string result = AllocTemporary();
+ AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
+ return result;
+ }
+ UNREACHABLE();
+ return "0";
+}
+
+void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
+ for (const auto& node : bb) {
+ Visit(node);
+ }
+}
+
+std::string ARBDecompiler::Visit(const Node& node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
+ if (const auto amend_index = operation->GetAmendIndex()) {
+ Visit(ir.GetAmendNode(*amend_index));
+ }
+ const std::size_t index = static_cast<std::size_t>(operation->GetCode());
+ if (index >= OPERATION_DECOMPILERS.size()) {
+ UNREACHABLE_MSG("Out of bounds operation: {}", index);
+ return {};
+ }
+ const auto decompiler = OPERATION_DECOMPILERS[index];
+ if (decompiler == nullptr) {
+ UNREACHABLE_MSG("Undefined operation: {}", index);
+ return {};
+ }
+ return (this->*decompiler)(*operation);
+ }
+
+ if (const auto gpr = std::get_if<GprNode>(&*node)) {
+ const u32 index = gpr->GetIndex();
+ if (index == Register::ZeroIndex) {
+ return "{0, 0, 0, 0}.x";
+ }
+ return fmt::format("R{}.x", index);
+ }
+
+ if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+ return fmt::format("CV{}.x", cv->GetIndex());
+ }
+
+ if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
+ std::string temporary = AllocTemporary();
+ AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
+ return temporary;
+ }
+
+ if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
+ std::string temporary = AllocTemporary();
+ switch (const auto index = predicate->GetIndex(); index) {
+ case Tegra::Shader::Pred::UnusedIndex:
+ AddLine("MOV.S {}, -1;", temporary);
+ break;
+ case Tegra::Shader::Pred::NeverExecute:
+ AddLine("MOV.S {}, 0;", temporary);
+ break;
+ default:
+ AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
+ break;
+ }
+ if (predicate->IsNegated()) {
+ AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
+ }
+ return temporary;
+ }
+
+ if (const auto abuf = std::get_if<AbufNode>(&*node)) {
+ if (abuf->IsPhysicalBuffer()) {
+ UNIMPLEMENTED_MSG("Physical buffers are not implemented");
+ return "{0, 0, 0, 0}.x";
+ }
+
+ const auto buffer_index = [this, &abuf]() -> std::string {
+ if (stage != ShaderType::Geometry) {
+ return "";
+ }
+ return fmt::format("[{}]", Visit(abuf->GetBuffer()));
+ };
+
+ const Attribute::Index index = abuf->GetIndex();
+ const u32 element = abuf->GetElement();
+ const char swizzle = Swizzle(element);
+ switch (index) {
+ case Attribute::Index::Position: {
+ if (stage == ShaderType::Geometry) {
+ return fmt::format("{}_position[{}].{}", StageInputName(stage),
+ Visit(abuf->GetBuffer()), swizzle);
+ } else {
+ return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
+ }
+ }
+ case Attribute::Index::TessCoordInstanceIDVertexID:
+ ASSERT(stage == ShaderType::Vertex);
+ switch (element) {
+ case 2:
+ return "vertex.instance";
+ case 3:
+ return "vertex.id";
+ }
+ UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+ break;
+ case Attribute::Index::PointCoord:
+ switch (element) {
+ case 0:
+ return "fragment.pointcoord.x";
+ case 1:
+ return "fragment.pointcoord.y";
+ }
+ UNIMPLEMENTED();
+ break;
+ case Attribute::Index::FrontFacing: {
+ ASSERT(stage == ShaderType::Fragment);
+ ASSERT(element == 3);
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
+ AddLine("MOV.U.CC RC.x, -RC;");
+ AddLine("MOV.S {}.x, 0;", temporary);
+ AddLine("MOV.S {}.x (NE.x), -1;", temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ default:
+ if (IsGenericAttribute(index)) {
+ if (stage == ShaderType::Geometry) {
+ return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
+ Visit(abuf->GetBuffer()), swizzle);
+ } else {
+ return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
+ GetGenericAttributeIndex(index), swizzle);
+ }
+ }
+ UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
+ break;
+ }
+ return "{0, 0, 0, 0}.x";
+ }
+
+ if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
+ std::string offset_string;
+ const auto& offset = cbuf->GetOffset();
+ if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
+ offset_string = std::to_string(imm->GetValue());
+ } else {
+ offset_string = Visit(offset);
+ }
+ std::string temporary = AllocTemporary();
+ AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
+ return temporary;
+ }
+
+ if (const auto gmem = std::get_if<GmemNode>(&*node)) {
+ std::string temporary = AllocTemporary();
+ AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
+ Visit(gmem->GetBaseAddress()));
+ AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
+ temporary);
+ return temporary;
+ }
+
+ if (const auto lmem = std::get_if<LmemNode>(&*node)) {
+ std::string temporary = Visit(lmem->GetAddress());
+ AddLine("SHR.U {}, {}, 2;", temporary, temporary);
+ AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
+ return temporary;
+ }
+
+ if (const auto smem = std::get_if<SmemNode>(&*node)) {
+ std::string temporary = Visit(smem->GetAddress());
+ AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
+ return temporary;
+ }
+
+ if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
+ const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
+ return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
+ }
+
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+ if (const auto amend_index = conditional->GetAmendIndex()) {
+ Visit(ir.GetAmendNode(*amend_index));
+ }
+ AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
+ AddLine("IF NE.x;");
+ VisitBlock(conditional->GetCode());
+ AddLine("ENDIF;");
+ return {};
+ }
+
+ if (const auto cmt = std::get_if<CommentNode>(&*node)) {
+ // Uncommenting this will generate invalid code. GLASM lacks comments.
+ // AddLine("// {}", cmt->GetText());
+ return {};
+ }
+
+ UNIMPLEMENTED();
+ return {};
+}
+
+std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ UNIMPLEMENTED_IF(meta.sampler.is_indexed);
+ UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
+ meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
+
+ const std::size_t count = operation.GetOperandsCount();
+ std::string temporary = AllocVectorTemporary();
+ std::size_t i = 0;
+ for (; i < count; ++i) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ if (meta.sampler.is_array) {
+ AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
+ }
+ if (meta.sampler.is_shadow) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
+ }
+ return {std::move(temporary), i};
+}
+
+std::string ARBDecompiler::BuildAoffi(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ if (meta.aoffi.empty()) {
+ return {};
+ }
+ const std::string temporary = AllocVectorTemporary();
+ std::size_t i = 0;
+ for (auto& node : meta.aoffi) {
+ AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
+ }
+ return fmt::format(", offset({})", temporary);
+}
+
+void ARBDecompiler::Exit() {
+ if (stage != ShaderType::Fragment) {
+ AddLine("RET;");
+ return;
+ }
+
+ const auto safe_get_register = [this](u32 reg) -> std::string {
+ // TODO(Rodrigo): Replace with contains once C++20 releases
+ const auto& used_registers = ir.GetRegisters();
+ if (used_registers.find(reg) != used_registers.end()) {
+ return fmt::format("R{}.x", reg);
+ }
+ return "{0, 0, 0, 0}.x";
+ };
+
+ const auto& header = ir.GetHeader();
+ u32 current_reg = 0;
+ for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
+ for (u32 component = 0; component < 4; ++component) {
+ if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
+ continue;
+ }
+ AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
+ safe_get_register(current_reg));
+ ++current_reg;
+ }
+ }
+ if (header.ps.omap.depth) {
+ AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
+ }
+
+ AddLine("RET;");
+}
+
+std::string ARBDecompiler::Assign(Operation operation) {
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
+
+ std::string dest_name;
+ if (const auto gpr = std::get_if<GprNode>(&*dest)) {
+ if (gpr->GetIndex() == Register::ZeroIndex) {
+ // Writing to Register::ZeroIndex is a no op
+ return {};
+ }
+ dest_name = fmt::format("R{}.x", gpr->GetIndex());
+ } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
+ const u32 element = abuf->GetElement();
+ const char swizzle = Swizzle(element);
+ switch (const Attribute::Index index = abuf->GetIndex()) {
+ case Attribute::Index::Position:
+ dest_name = fmt::format("result.position.{}", swizzle);
+ break;
+ case Attribute::Index::LayerViewportPointSize:
+ switch (element) {
+ case 0:
+ UNIMPLEMENTED();
+ return {};
+ case 1:
+ case 2:
+ if (!device.HasNvViewportArray2()) {
+ LOG_ERROR(
+ Render_OpenGL,
+ "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
+ return {};
+ }
+ dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
+ break;
+ case 3:
+ dest_name = "result.pointsize.x";
+ break;
+ }
+ break;
+ case Attribute::Index::ClipDistances0123:
+ dest_name = fmt::format("result.clip[{}].x", element);
+ break;
+ case Attribute::Index::ClipDistances4567:
+ dest_name = fmt::format("result.clip[{}].x", element + 4);
+ break;
+ default:
+ if (!IsGenericAttribute(index)) {
+ UNREACHABLE();
+ return {};
+ }
+ dest_name =
+ fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
+ break;
+ }
+ } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
+ const std::string address = Visit(lmem->GetAddress());
+ AddLine("SHR.U {}, {}, 2;", address, address);
+ dest_name = fmt::format("lmem[{}].x", address);
+ } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
+ AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
+ ResetTemporaries();
+ return {};
+ } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
+ const std::string temporary = AllocTemporary();
+ AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
+ Visit(gmem->GetBaseAddress()));
+ AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
+ temporary);
+ ResetTemporaries();
+ return {};
+ } else {
+ UNREACHABLE();
+ ResetTemporaries();
+ return {};
+ }
+
+ AddLine("MOV.U {}, {};", dest_name, Visit(src));
+ ResetTemporaries();
+ return {};
+}
+
+std::string ARBDecompiler::Select(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
+ Visit(operation[2]));
+ return temporary;
+}
+
+std::string ARBDecompiler::FClamp(Operation operation) {
+ // 1.0f in hex, replace with std::bit_cast on C++20
+ static constexpr u32 POSITIVE_ONE = 0x3f800000;
+
+ std::string temporary = AllocTemporary();
+ const Node& value = operation[0];
+ const Node& low = operation[1];
+ const Node& high = operation[2];
+ const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
+ const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
+ if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
+ AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
+ } else {
+ AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
+ AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
+ }
+ return temporary;
+}
+
+std::string ARBDecompiler::FCastHalf0(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::FCastHalf1(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
+ AddLine("MOV {}.x, {}.y;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::FSqrt(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
+ AddLine("RCP.F32 {}, {};", temporary, temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "NV_shader_thread_shuffle is missing. Kepler or better is required.");
+ AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
+ return fmt::format("{}.x", temporary);
+ }
+
+ AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
+ AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
+ AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
+ AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
+ AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
+ AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
+ AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HAdd2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HMul2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HFma2(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ const std::string tmp3 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
+ AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
+ AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HAbsolute(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HNegate(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
+ AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
+ AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
+ AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HClamp(Operation operation) {
+ const std::string tmp1 = AllocVectorTemporary();
+ const std::string tmp2 = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
+ AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
+ AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
+ AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
+ AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
+ AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
+ AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
+ return fmt::format("{}.x", tmp1);
+}
+
+std::string ARBDecompiler::HCastFloat(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
+ AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HUnpack(Operation operation) {
+ const std::string operand = Visit(operation[0]);
+ switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+ case Tegra::Shader::HalfType::H0_H1:
+ return operand;
+ case Tegra::Shader::HalfType::F32: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.U {}.x, {};", temporary, operand);
+ AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ case Tegra::Shader::HalfType::H0_H0: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, operand);
+ AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ case Tegra::Shader::HalfType::H1_H1: {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, operand);
+ AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+ }
+ }
+ UNREACHABLE();
+ return "{0, 0, 0, 0}.x";
+}
+
+std::string ARBDecompiler::HMergeF32(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HMergeH0(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
+ AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HMergeH1(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
+ AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
+ AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::HPack2(Operation operation) {
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
+ AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
+ AddLine("PK2H.F {}.x, {};", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::LogicalAssign(Operation operation) {
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
+
+ std::string target;
+
+ if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
+ ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
+
+ const Tegra::Shader::Pred index = pred->GetIndex();
+ switch (index) {
+ case Tegra::Shader::Pred::NeverExecute:
+ case Tegra::Shader::Pred::UnusedIndex:
+ // Writing to these predicates is a no-op
+ return {};
+ }
+ target = fmt::format("P{}.x", static_cast<u64>(index));
+ } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
+ const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
+ target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
+ } else {
+ UNREACHABLE();
+ ResetTemporaries();
+ return {};
+ }
+
+ AddLine("MOV.U {}, {};", target, Visit(src));
+ ResetTemporaries();
+ return {};
+}
+
+std::string ARBDecompiler::LogicalPick2(Operation operation) {
+ std::string temporary = AllocTemporary();
+ const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
+ AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
+ return temporary;
+}
+
+std::string ARBDecompiler::LogicalAnd2(Operation operation) {
+ std::string temporary = AllocTemporary();
+ const std::string op = Visit(operation[0]);
+ AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
+ return temporary;
+}
+
+std::string ARBDecompiler::FloatOrdered(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
+ AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
+ AddLine("MOV.S {}, -1;", temporary);
+ AddLine("MOV.S {} (NAN.x), 0;", temporary);
+ AddLine("MOV.S {} (NAN.y), 0;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::FloatUnordered(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
+ AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("MOV.S {} (NAN.x), -1;", temporary);
+ AddLine("MOV.S {} (NAN.y), -1;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
+ std::string temporary = AllocTemporary();
+ AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
+ AddLine("MOV.S {}, 0;", temporary);
+ AddLine("IF CF.x;");
+ AddLine("MOV.S {}, -1;", temporary);
+ AddLine("ENDIF;");
+ return temporary;
+}
+
+std::string ARBDecompiler::Texture(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ std::string_view opcode = "TEX";
+ std::string extra;
+ if (meta.bias) {
+ ASSERT(!meta.lod);
+ opcode = "TXB";
+
+ if (swizzle < 4) {
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
+ } else {
+ const std::string bias = AllocTemporary();
+ AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
+ extra = fmt::format(" {},", bias);
+ }
+ }
+ if (meta.lod) {
+ ASSERT(!meta.bias);
+ opcode = "TXL";
+
+ if (swizzle < 4) {
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
+ } else {
+ const std::string lod = AllocTemporary();
+ AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
+ extra = fmt::format(" {},", lod);
+ }
+ }
+
+ AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureGather(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ std::string comp;
+ if (!meta.sampler.is_shadow) {
+ const auto& immediate = std::get<ImmediateNode>(*meta.component);
+ comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
+ }
+
+ AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const std::string temporary = AllocVectorTemporary();
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+
+ ASSERT(!meta.sampler.is_array);
+
+ const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
+ AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureQueryLod(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const std::string temporary = AllocVectorTemporary();
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+
+ ASSERT(!meta.sampler.is_array);
+
+ const std::size_t count = operation.GetOperandsCount();
+ for (std::size_t i = 0; i < count; ++i) {
+ AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
+ AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
+ AddLine("TRUNC.S {}, {};", temporary, temporary);
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TexelFetch(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const auto [temporary, swizzle] = BuildCoords(operation);
+
+ if (!meta.sampler.is_buffer) {
+ ASSERT(swizzle < 4);
+ AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
+ }
+ AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
+ BuildAoffi(operation));
+ AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::TextureGradient(Operation operation) {
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
+ const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
+ const std::string ddx = AllocVectorTemporary();
+ const std::string ddy = AllocVectorTemporary();
+ const std::string coord = BuildCoords(operation).first;
+
+ const std::size_t num_components = meta.derivates.size() / 2;
+ for (std::size_t index = 0; index < num_components; ++index) {
+ const char swizzle = Swizzle(index);
+ AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
+ AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
+ }
+
+ const std::string_view result = coord;
+ AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
+ TextureType(meta), BuildAoffi(operation));
+ AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
+ return fmt::format("{}.x", result);
+}
+
+std::string ARBDecompiler::ImageLoad(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t count = operation.GetOperandsCount();
+ const std::string_view type = ImageType(meta.image.type);
+
+ const std::string temporary = AllocVectorTemporary();
+ for (std::size_t i = 0; i < count; ++i) {
+ AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
+ }
+ AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
+ AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::ImageStore(Operation operation) {
+ const auto& meta = std::get<MetaImage>(operation.GetMeta());
+ const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
+ const std::size_t num_coords = operation.GetOperandsCount();
+ const std::size_t num_values = meta.values.size();
+ const std::string_view type = ImageType(meta.image.type);
+
+ const std::string coord = AllocVectorTemporary();
+ const std::string value = AllocVectorTemporary();
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
+ }
+ for (std::size_t i = 0; i < num_values; ++i) {
+ AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
+ }
+ AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
+ return {};
+}
+
+std::string ARBDecompiler::Branch(Operation operation) {
+ const auto target = std::get<ImmediateNode>(*operation[0]);
+ AddLine("MOV.U PC.x, {};", target.GetValue());
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::BranchIndirect(Operation operation) {
+ AddLine("MOV.U PC.x, {};", Visit(operation[0]));
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::PushFlowStack(Operation operation) {
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
+ const std::string_view stack_name = StackName(stack);
+ AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
+ AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
+ return {};
+}
+
+std::string ARBDecompiler::PopFlowStack(Operation operation) {
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ const std::string_view stack_name = StackName(stack);
+ AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
+ AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
+ AddLine("CONT;");
+ return {};
+}
+
+std::string ARBDecompiler::Exit(Operation) {
+ Exit();
+ return {};
+}
+
+std::string ARBDecompiler::Discard(Operation) {
+ AddLine("KIL TR;");
+ return {};
+}
+
+std::string ARBDecompiler::EmitVertex(Operation) {
+ AddLine("EMIT;");
+ return {};
+}
+
+std::string ARBDecompiler::EndPrimitive(Operation) {
+ AddLine("ENDPRIM;");
+ return {};
+}
+
+std::string ARBDecompiler::InvocationId(Operation) {
+ return "primitive.invocation";
+}
+
+std::string ARBDecompiler::YNegate(Operation) {
+ LOG_WARNING(Render_OpenGL, "(STUBBED)");
+ const std::string temporary = AllocTemporary();
+ AddLine("MOV.F {}, 1;", temporary);
+ return temporary;
+}
+
+std::string ARBDecompiler::ThreadId(Operation) {
+ return fmt::format("{}.threadid", StageInputName(stage));
+}
+
+std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "NV_shader_thread_shuffle is missing. Kepler or better is required.");
+ return Visit(operation[0]);
+ }
+ const std::string temporary = AllocVectorTemporary();
+ AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
+ Visit(operation[1]));
+ AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
+ return fmt::format("{}.x", temporary);
+}
+
+std::string ARBDecompiler::Barrier(Operation) {
+ if (!ir.IsDecompiled()) {
+ LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
+ return {};
+ }
+ AddLine("BAR;");
+ return {};
+}
+
+std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
+ AddLine("MEMBAR.CTA;");
+ return {};
+}
+
+std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
+ AddLine("MEMBAR;");
+ return {};
+}
+
+} // Anonymous namespace
+
+std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ Tegra::Engines::ShaderType stage, std::string_view identifier) {
+ return ARBDecompiler(device, ir, registry, stage, identifier).Code();
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
new file mode 100644
index 000000000..6afc87220
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@@ -0,0 +1,29 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <string_view>
+
+#include "common/common_types.h"
+
+namespace Tegra::Engines {
+enum class ShaderType : u32;
+}
+
+namespace VideoCommon::Shader {
+class ShaderIR;
+class Registry;
+} // namespace VideoCommon::Shader
+
+namespace OpenGL {
+
+class Device;
+
+std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ Tegra::Engines::ShaderType stage, std::string_view identifier);
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index d2cab50bd..d9f7b4cc6 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,6 +8,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -21,22 +22,46 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
-CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
+Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
: VideoCommon::BufferBlock{cpu_addr, size} {
gl_buffer.Create();
glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
+ glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
+ }
}
-CachedBufferBlock::~CachedBufferBlock() = default;
+Buffer::~Buffer() = default;
+
+void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
+ glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
+ data);
+}
+
+void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
+ MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+ glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
+ data);
+}
+
+void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size) const {
+ glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
+ static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
+}
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
- const Device& device, std::size_t stream_size)
- : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
+ const Device& device_, std::size_t stream_size)
+ : GenericBufferCache{rasterizer, system,
+ std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
+ device{device_} {
if (!device.HasFastBufferSubData()) {
return;
}
- static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
+ static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
for (const GLuint cbuf : cbufs) {
glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
@@ -47,44 +72,21 @@ OGLBufferCache::~OGLBufferCache() {
glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
}
-Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<CachedBufferBlock>(cpu_addr, size);
-}
-
-GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
- return buffer->GetHandle();
-}
-
-GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
- return 0;
+std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+ return std::make_shared<Buffer>(device, cpu_addr, size);
}
-void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) {
- glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
- static_cast<GLsizeiptr>(size), data);
-}
-
-void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) {
- MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
- glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
- glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
- static_cast<GLsizeiptr>(size), data);
-}
-
-void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) {
- glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
- static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
+OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
+ return {0, 0, 0};
}
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
std::size_t size) {
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
- const GLuint& cbuf = cbufs[cbuf_cursor++];
+ const GLuint cbuf = cbufs[cbuf_cursor++];
+
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
- return {cbuf, 0};
+ return {cbuf, 0, 0};
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a9e86cfc7..59d95adbc 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -24,57 +23,57 @@ class Device;
class OGLStreamBuffer;
class RasterizerOpenGL;
-class CachedBufferBlock;
+class Buffer : public VideoCommon::BufferBlock {
+public:
+ explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
+ ~Buffer();
-using Buffer = std::shared_ptr<CachedBufferBlock>;
-using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
+ void Upload(std::size_t offset, std::size_t size, const u8* data) const;
-class CachedBufferBlock : public VideoCommon::BufferBlock {
-public:
- explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
- ~CachedBufferBlock();
+ void Download(std::size_t offset, std::size_t size, u8* data) const;
+
+ void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size) const;
- GLuint GetHandle() const {
+ GLuint Handle() const noexcept {
return gl_buffer.handle;
}
+ u64 Address() const noexcept {
+ return gpu_address;
+ }
+
private:
OGLBuffer gl_buffer;
+ u64 gpu_address = 0;
};
+using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
public:
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
const Device& device, std::size_t stream_size);
~OGLBufferCache();
- GLuint GetEmptyBuffer(std::size_t) override;
+ BufferInfo GetEmptyBuffer(std::size_t) override;
void Acquire() noexcept {
cbuf_cursor = 0;
}
protected:
- Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
-
- GLuint ToHandle(const Buffer& buffer) override;
-
- void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) override;
-
- void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) override;
-
- void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) override;
+ std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
private:
+ static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+ Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+
+ const Device& device;
+
std::size_t cbuf_cursor = 0;
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
- Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
- cbufs;
+ std::array<GLuint, NUM_CBUFS> cbufs{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index d83dca25a..208fc6167 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
#include <cstring>
+#include <limits>
#include <optional>
#include <vector>
@@ -13,6 +14,7 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
+#include "core/settings.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -25,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
-constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
- GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
- GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS};
+constexpr std::array LimitUBOs = {
+ GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
+ GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
+ GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
constexpr std::array LimitSSBOs = {
- GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
+ GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS};
+ GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
-constexpr std::array LimitSamplers = {
- GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS};
+constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
+ GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
+ GL_MAX_TEXTURE_IMAGE_UNITS,
+ GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
-constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS,
- GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
- GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS,
- GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS};
+constexpr std::array LimitImages = {
+ GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
+ GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
+ GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
template <typename T>
T GetInteger(GLenum pname) {
@@ -84,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
return std::exchange(base, base + amount);
}
+std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
+ std::array<u32, Tegra::Engines::MaxShaderTypes> max;
+ std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
+ [](GLenum pname) { return GetInteger<u32>(pname); });
+ return max;
+}
+
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
@@ -111,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
u32 base_images = 0;
- // Reserve more image bindings on fragment and vertex stages.
+ // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
+ // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
+ // fragment stage, and at least 1 for the rest of the stages.
+ // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
+
+ // Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
- Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
- bindings[0].image =
- Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);
+ Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
+
+ // This is guaranteed to be at least 1.
+ const u32 total_extracted_images = num_images / (NumStages - 1);
// Reserve the other image bindings.
- const u32 total_extracted_images = num_images / (NumStages - 2);
- for (std::size_t i = 2; i < NumStages; ++i) {
+ for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
+ if (stage == 4) {
+ continue;
+ }
bindings[stage].image =
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
}
@@ -132,6 +152,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
}
bool IsASTCSupported() {
+ static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
static constexpr std::array formats = {
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
@@ -148,25 +169,44 @@ bool IsASTCSupported() {
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
- return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
- GLint supported;
- glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
- &supported);
- return supported == GL_TRUE;
- }) == formats.end();
+ static constexpr std::array required_support = {
+ GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
+ GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
+ };
+
+ for (const GLenum target : targets) {
+ for (const GLenum format : formats) {
+ for (const GLenum support : required_support) {
+ GLint value;
+ glGetInternalformativ(target, format, support, 1, &value);
+ if (value != GL_FULL_SUPPORT) {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
}
} // Anonymous namespace
-Device::Device() : base_bindings{BuildBaseBindings()} {
+Device::Device()
+ : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
- const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
+ const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
+ const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
- const bool is_intel = vendor == "Intel";
- const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
+
+ bool disable_fast_buffer_sub_data = false;
+ if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
+ LOG_WARNING(
+ Render_OpenGL,
+ "Beta driver 443.24 is known to have issues. There might be performance issues.");
+ disable_fast_buffer_sub_data = true;
+ }
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -177,30 +217,43 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
+ has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
has_astc = IsASTCSupported();
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
- has_broken_compute = is_intel_proprietary;
- has_fast_buffer_sub_data = is_nvidia;
+ has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
+ has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
+
+ // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
+ // uniform buffers as "push constants"
+ has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
+
+ use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
+ GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
+ GLAD_GL_NV_transform_feedback2;
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
+
+ if (Settings::values.use_assembly_shaders && !use_assembly_shaders) {
+ LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
+ }
}
Device::Device(std::nullptr_t) {
- uniform_buffer_alignment = 0;
+ max_uniform_buffers.fill(std::numeric_limits<u32>::max());
+ uniform_buffer_alignment = 4;
+ shader_storage_alignment = 4;
max_vertex_attributes = 16;
max_varyings = 15;
has_warp_intrinsics = true;
has_shader_ballot = true;
has_vertex_viewport_layer = true;
has_image_load_formatted = true;
+ has_texture_shadow_lod = true;
has_variable_aoffi = true;
- has_component_indexing_bug = false;
- has_broken_compute = false;
- has_precise_bug = false;
}
bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index a55050cb5..e1d811966 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -24,6 +24,10 @@ public:
explicit Device();
explicit Device(std::nullptr_t);
+ u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
+ return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
+ }
+
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
return base_bindings[stage_index];
}
@@ -64,6 +68,14 @@ public:
return has_image_load_formatted;
}
+ bool HasTextureShadowLod() const {
+ return has_texture_shadow_lod;
+ }
+
+ bool HasVertexBufferUnifiedMemory() const {
+ return has_vertex_buffer_unified_memory;
+ }
+
bool HasASTC() const {
return has_astc;
}
@@ -80,19 +92,24 @@ public:
return has_precise_bug;
}
- bool HasBrokenCompute() const {
- return has_broken_compute;
- }
-
bool HasFastBufferSubData() const {
return has_fast_buffer_sub_data;
}
+ bool HasNvViewportArray2() const {
+ return has_nv_viewport_array2;
+ }
+
+ bool UseAssemblyShaders() const {
+ return use_assembly_shaders;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
- std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
+ std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
+ std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
std::size_t uniform_buffer_alignment{};
std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
@@ -101,12 +118,15 @@ private:
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
bool has_image_load_formatted{};
+ bool has_texture_shadow_lod{};
+ bool has_vertex_buffer_unified_memory{};
bool has_astc{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
bool has_precise_bug{};
- bool has_broken_compute{};
bool has_fast_buffer_sub_data{};
+ bool has_nv_viewport_array2{};
+ bool use_assembly_shaders{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 99ddcb3f8..ec5421afa 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8b3b3ce92..e960a0ef1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,6 +30,7 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/shader_cache.h"
namespace OpenGL {
@@ -54,15 +55,34 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
namespace {
-constexpr std::size_t NumSupportedVertexAttributes = 16;
+constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
+constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
+ NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
+constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
+ NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
+
+constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
+constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
ShaderType shader_type, std::size_t index = 0) {
+ if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+ if (entry.is_separated) {
+ const u32 buffer_1 = entry.buffer;
+ const u32 buffer_2 = entry.secondary_buffer;
+ const u32 offset_1 = entry.offset;
+ const u32 offset_2 = entry.secondary_offset;
+ const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
+ const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
+ return engine.GetTextureInfo(handle_1 | handle_2);
+ }
+ }
if (entry.is_bindless) {
- const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return engine.GetTextureInfo(tex_handle);
+ const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
+ return engine.GetTextureInfo(handle);
}
+
const auto& gpu_profile = engine.AccessGuestDriverProfile();
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
@@ -87,6 +107,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
return buffer.size;
}
+/// Translates hardware transform feedback indices
+/// @param location Hardware location
+/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
+/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
+std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
+ const u8 index = location / 4;
+ if (index >= 8 && index <= 39) {
+ return {GL_GENERIC_ATTRIB_NV, index - 8};
+ }
+ if (index >= 48 && index <= 55) {
+ return {GL_TEXTURE_COORD_NV, index - 48};
+ }
+ switch (index) {
+ case 7:
+ return {GL_POSITION, 0};
+ case 40:
+ return {GL_PRIMARY_COLOR_NV, 0};
+ case 41:
+ return {GL_SECONDARY_COLOR_NV, 0};
+ case 42:
+ return {GL_BACK_PRIMARY_COLOR_NV, 0};
+ case 43:
+ return {GL_BACK_SECONDARY_COLOR_NV, 0};
+ }
+ UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
+ return {GL_POSITION, 0};
+}
+
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
@@ -94,17 +142,33 @@ void oglEnable(GLenum cap, bool state) {
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- ScreenInfo& info, GLShader::ProgramManager& program_manager,
- StateTracker& state_tracker)
- : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
+ const Device& device, ScreenInfo& info,
+ ProgramManager& program_manager, StateTracker& state_tracker)
+ : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
+ state_tracker},
shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
CheckExtensions();
+
+ unified_uniform_buffer.Create();
+ glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
+
+ if (device.UseAssemblyShaders()) {
+ glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
+ for (const GLuint cbuf : staging_cbufs) {
+ glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
+ nullptr, 0);
+ }
+ }
}
-RasterizerOpenGL::~RasterizerOpenGL() {}
+RasterizerOpenGL::~RasterizerOpenGL() {
+ if (device.UseAssemblyShaders()) {
+ glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
+ }
+}
void RasterizerOpenGL::CheckExtensions() {
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
@@ -130,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
// avoid OpenGL errors.
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
// assume every shader uses them all.
- for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
+ for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexFormat0 + index]) {
continue;
}
@@ -149,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() {
if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
- MaxwellToGL::VertexType(attrib), attrib.offset);
+ MaxwellToGL::VertexFormat(attrib), attrib.offset);
} else {
- glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
+ glVertexAttribFormat(gl_index, attrib.ComponentCount(),
+ MaxwellToGL::VertexFormat(attrib),
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
}
glVertexAttribBinding(gl_index, attrib.buffer);
@@ -168,9 +233,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {
MICROPROFILE_SCOPE(OpenGL_VB);
+ const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
+
// Upload all guest vertex arrays sequentially to our buffer
const auto& regs = gpu.regs;
- for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
}
@@ -183,16 +250,25 @@ void RasterizerOpenGL::SetupVertexBuffer() {
const GPUVAddr start = vertex_array.StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
-
ASSERT(end >= start);
+
+ const GLuint gl_index = static_cast<GLuint>(index);
const u64 size = end - start;
if (size == 0) {
- glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride);
+ glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
+ if (use_unified_memory) {
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
+ }
continue;
}
- const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
- glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
- vertex_array.stride);
+ const auto info = buffer_cache.UploadMemory(start, size);
+ if (use_unified_memory) {
+ glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
+ info.address + info.offset, size);
+ } else {
+ glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
+ }
}
}
@@ -205,7 +281,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
flags[Dirty::VertexInstances] = false;
const auto& regs = gpu.regs;
- for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
+ for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) {
continue;
}
@@ -222,14 +298,15 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
MICROPROFILE_SCOPE(OpenGL_Index);
const auto& regs = system.GPU().Maxwell3D().regs;
const std::size_t size = CalculateIndexBufferSize();
- const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
- return offset;
+ const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
+ return info.offset;
}
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = system.GPU().Maxwell3D();
+ std::size_t num_ssbos = 0;
u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -259,7 +336,15 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
continue;
}
- Shader shader{shader_cache.GetStageProgram(program)};
+ Shader* const shader = shader_cache.GetStageProgram(program);
+
+ if (device.UseAssemblyShaders()) {
+ // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
+ // all stages share the same bindings.
+ const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
+ ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
+ num_ssbos += num_stage_ssbos;
+ }
// Stage indices are 0 - 5
const std::size_t stage = index == 0 ? 0 : index - 1;
@@ -526,6 +611,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncFramebufferSRGB();
buffer_cache.Acquire();
+ current_cbuf = 0;
std::size_t buffer_size = CalculateVertexArraysSize();
@@ -535,16 +621,25 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
// Uniform space for the 5 shader stages
- buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
- (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) *
- Maxwell::MaxShaderStage;
+ buffer_size =
+ Common::AlignUp<std::size_t>(buffer_size, 4) +
+ (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers *
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
// Prepare the vertex array.
- buffer_cache.Map(buffer_size);
+ const bool invalidated = buffer_cache.Map(buffer_size);
+
+ if (invalidated) {
+ // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
+ auto& dirty = gpu.dirty.flags;
+ dirty[Dirty::VertexBuffers] = true;
+ for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
+ dirty[index] = true;
+ }
+ }
// Prepare vertex array format.
SetupVertexFormat();
@@ -558,12 +653,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
// Setup emulation uniform buffer.
- GLShader::MaxwellUniformData ubo;
- ubo.SetFromRegs(gpu);
- const auto [buffer, offset] =
- buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
- glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
- static_cast<GLsizeiptr>(sizeof(ubo)));
+ if (!device.UseAssemblyShaders()) {
+ MaxwellUniformData ubo;
+ ubo.SetFromRegs(gpu);
+ const auto info =
+ buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
+ glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
+ static_cast<GLsizeiptr>(sizeof(ubo)));
+ }
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
@@ -630,16 +727,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- if (device.HasBrokenCompute()) {
- return;
- }
-
buffer_cache.Acquire();
+ current_cbuf = 0;
auto kernel = shader_cache.GetComputeKernel(code_addr);
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
- program_manager.BindComputeShader(kernel->GetHandle());
const std::size_t buffer_size =
Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -652,6 +745,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -701,15 +795,15 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
return;
}
texture_cache.OnCPUWrite(addr, size);
- shader_cache.InvalidateRegion(addr, size);
+ shader_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
- query_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::SyncGuestHost() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
+ shader_cache.SyncGuestHost();
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
@@ -811,40 +905,72 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
+ static constexpr std::array PARAMETER_LUT = {
+ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
+ GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
+
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
const auto& shader_stage = stages[stage_index];
+ const auto& entries = shader->GetEntries();
+ const bool use_unified = entries.use_unified_uniforms;
+ const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
- u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
- for (const auto& entry : shader->GetEntries().const_buffers) {
- const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
- SetupConstBuffer(binding++, buffer, entry);
+ const auto base_bindings = device.GetBaseBindings(stage_index);
+ u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
+ for (const auto& entry : entries.const_buffers) {
+ const u32 index = entry.GetIndex();
+ const auto& buffer = shader_stage.const_buffers[index];
+ SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
+ base_unified_offset + index * Maxwell::MaxConstBufferSize);
+ ++binding;
+ }
+ if (use_unified) {
+ const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
+ entries.global_memory_entries.size());
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
+ base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
}
}
-void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& entries = kernel->GetEntries();
+ const bool use_unified = entries.use_unified_uniforms;
u32 binding = 0;
- for (const auto& entry : kernel->GetEntries().const_buffers) {
+ for (const auto& entry : entries.const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
buffer.address = config.Address();
buffer.size = config.size;
buffer.enabled = mask[entry.GetIndex()];
- SetupConstBuffer(binding++, buffer, entry);
+ SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
+ use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
+ ++binding;
+ }
+ if (use_unified) {
+ const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
+ NUM_CONST_BUFFERS_BYTES_PER_STAGE);
}
}
-void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry) {
+void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
+ const Tegra::Engines::ConstBufferInfo& buffer,
+ const ConstBufferEntry& entry, bool use_unified,
+ std::size_t unified_offset) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
- sizeof(float));
+ if (device.UseAssemblyShaders()) {
+ glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
+ } else {
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
+ }
return;
}
@@ -852,18 +978,39 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
// UBO alignment requirements.
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
- const auto alignment = device.GetUniformBufferAlignment();
- const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
- device.HasFastBufferSubData());
- glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
+ const bool fast_upload = !use_unified && device.HasFastBufferSubData();
+
+ const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
+ const GPUVAddr gpu_addr = buffer.address;
+ auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
+
+ if (device.UseAssemblyShaders()) {
+ UNIMPLEMENTED_IF(use_unified);
+ if (info.offset != 0) {
+ const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
+ glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
+ info.handle = staging_cbuf;
+ info.offset = 0;
+ }
+ glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
+ return;
+ }
+
+ if (use_unified) {
+ glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
+ unified_offset, size);
+ } else {
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
+ }
}
-void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
- u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
+ u32 binding =
+ device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : shader->GetEntries().global_memory_entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
@@ -872,7 +1019,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
}
}
-void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
@@ -889,13 +1036,12 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()};
- const auto [ssbo, buffer_offset] =
- buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
+ const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
static_cast<GLsizeiptr>(size));
}
-void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
@@ -908,7 +1054,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
}
}
-void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
@@ -929,19 +1075,15 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
glBindTextureUnit(binding, 0);
return;
}
- glBindTextureUnit(binding, view->GetTexture());
-
- if (view->GetSurfaceParams().IsBuffer()) {
- return;
+ const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
+ texture.tic.z_source, texture.tic.w_source);
+ glBindTextureUnit(binding, handle);
+ if (!view->GetSurfaceParams().IsBuffer()) {
+ glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
}
- // Apply swizzle to textures that are not buffers.
- view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
- texture.tic.w_source);
-
- glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
}
-void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
+void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image;
for (const auto& entry : shader->GetEntries().images) {
@@ -951,7 +1093,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
}
}
-void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
+void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : shader->GetEntries().images) {
@@ -967,14 +1109,11 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
return;
}
- if (!tic.IsBuffer()) {
- view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
- }
if (entry.is_written) {
view->MarkAsModified(texture_cache.Tick());
}
- glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE,
- view->GetFormat());
+ const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
}
void RasterizerOpenGL::SyncViewport() {
@@ -983,6 +1122,26 @@ void RasterizerOpenGL::SyncViewport() {
const auto& regs = gpu.regs;
const bool dirty_viewport = flags[Dirty::Viewports];
+ const bool dirty_clip_control = flags[Dirty::ClipControl];
+
+ if (dirty_clip_control || flags[Dirty::FrontFace]) {
+ flags[Dirty::FrontFace] = false;
+
+ GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
+ if (regs.screen_y_control.triangle_rast_flip != 0 &&
+ regs.viewport_transform[0].scale_y < 0.0f) {
+ switch (mode) {
+ case GL_CW:
+ mode = GL_CCW;
+ break;
+ case GL_CCW:
+ mode = GL_CW;
+ break;
+ }
+ }
+ glFrontFace(mode);
+ }
+
if (dirty_viewport || flags[Dirty::ClipControl]) {
flags[Dirty::ClipControl] = false;
@@ -1019,6 +1178,14 @@ void RasterizerOpenGL::SyncViewport() {
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+
+ if (!GLAD_GL_NV_viewport_swizzle) {
+ continue;
+ }
+ glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x),
+ MaxwellToGL::ViewportSwizzle(src.swizzle.y),
+ MaxwellToGL::ViewportSwizzle(src.swizzle.z),
+ MaxwellToGL::ViewportSwizzle(src.swizzle.w));
}
}
}
@@ -1072,11 +1239,6 @@ void RasterizerOpenGL::SyncCullMode() {
glDisable(GL_CULL_FACE);
}
}
-
- if (flags[Dirty::FrontFace]) {
- flags[Dirty::FrontFace] = false;
- glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
- }
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
@@ -1447,12 +1609,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
}
+void RasterizerOpenGL::SyncTransformFeedback() {
+ // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
+ // when this is required.
+ const auto& regs = system.GPU().Maxwell3D().regs;
+
+ static constexpr std::size_t STRIDE = 3;
+ std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
+ std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
+
+ GLint* cursor = attribs.data();
+ GLint* current_stream = streams.data();
+
+ for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
+ const auto& layout = regs.tfb_layouts[feedback];
+ UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
+ if (layout.varying_count == 0) {
+ continue;
+ }
+
+ *current_stream = static_cast<GLint>(feedback);
+ if (current_stream != streams.data()) {
+ // When stepping one stream, push the expected token
+ cursor[0] = GL_NEXT_BUFFER_NV;
+ cursor[1] = 0;
+ cursor[2] = 0;
+ cursor += STRIDE;
+ }
+ ++current_stream;
+
+ const auto& locations = regs.tfb_varying_locs[feedback];
+ std::optional<u8> current_index;
+ for (u32 offset = 0; offset < layout.varying_count; ++offset) {
+ const u8 location = locations[offset];
+ const u8 index = location / 4;
+
+ if (current_index == index) {
+ // Increase number of components of the previous attachment
+ ++cursor[-2];
+ continue;
+ }
+ current_index = index;
+
+ std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
+ cursor[1] = 1;
+ cursor += STRIDE;
+ }
+ }
+
+ const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
+ const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
+ glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
+ GL_INTERLEAVED_ATTRIBS);
+}
+
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
const auto& regs = system.GPU().Maxwell3D().regs;
if (regs.tfb_enabled == 0) {
return;
}
+ if (device.UseAssemblyShaders()) {
+ SyncTransformFeedback();
+ }
+
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1479,6 +1699,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
static_cast<GLsizeiptr>(size));
}
+ // We may have to call BeginTransformFeedbackNV here since they seem to call different
+ // implementations on Nvidia's driver (the pointer is different) but we are using
+ // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
+ // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
glBeginTransformFeedback(GL_POINTS);
}
@@ -1500,8 +1724,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
const GLuint handle = transform_feedback_buffers[index].handle;
const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size;
- const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
- glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
+ const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+ glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
+ static_cast<GLsizeiptr>(size));
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b94c65907..4f082592f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,7 +19,6 @@
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_accelerated.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
@@ -56,8 +55,8 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- ScreenInfo& info, GLShader::ProgramManager& program_manager,
- StateTracker& state_tracker);
+ const Device& device, ScreenInfo& info,
+ ProgramManager& program_manager, StateTracker& state_tracker);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -100,40 +99,41 @@ private:
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
/// Configures the current constbuffers to use for the draw command.
- void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
+ void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
/// Configures the current constbuffers to use for the kernel invocation.
- void SetupComputeConstBuffers(const Shader& kernel);
+ void SetupComputeConstBuffers(Shader* kernel);
/// Configures a constant buffer.
- void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const ConstBufferEntry& entry);
+ void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
+ const ConstBufferEntry& entry, bool use_unified,
+ std::size_t unified_offset);
/// Configures the current global memory entries to use for the draw command.
- void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
+ void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
/// Configures the current global memory entries to use for the kernel invocation.
- void SetupComputeGlobalMemory(const Shader& kernel);
+ void SetupComputeGlobalMemory(Shader* kernel);
/// Configures a constant buffer.
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
std::size_t size);
/// Configures the current textures to use for the draw command.
- void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
+ void SetupDrawTextures(std::size_t stage_index, Shader* shader);
/// Configures the textures used in a compute shader.
- void SetupComputeTextures(const Shader& kernel);
+ void SetupComputeTextures(Shader* kernel);
/// Configures a texture.
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const SamplerEntry& entry);
/// Configures images in a graphics shader.
- void SetupDrawImages(std::size_t stage_index, const Shader& shader);
+ void SetupDrawImages(std::size_t stage_index, Shader* shader);
/// Configures images in a compute shader.
- void SetupComputeImages(const Shader& shader);
+ void SetupComputeImages(Shader* shader);
/// Configures an image.
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
@@ -201,6 +201,10 @@ private:
/// Syncs the framebuffer sRGB state to match the guest state
void SyncFramebufferSRGB();
+ /// Syncs transform feedback state to match guest state
+ /// @note Only valid on assembly shaders
+ void SyncTransformFeedback();
+
/// Begin a transform feedback
void BeginTransformFeedback(GLenum primitive_mode);
@@ -224,7 +228,7 @@ private:
void SetupShaders(GLenum primitive_mode);
- const Device device;
+ const Device& device;
TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache;
@@ -236,7 +240,7 @@ private:
Core::System& system;
ScreenInfo& screen_info;
- GLShader::ProgramManager& program_manager;
+ ProgramManager& program_manager;
StateTracker& state_tracker;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
@@ -248,6 +252,13 @@ private:
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
enabled_transform_feedback_buffers;
+ static constexpr std::size_t NUM_CONSTANT_BUFFERS =
+ Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+ Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
+ std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
+ std::size_t current_cbuf = 0;
+ OGLBuffer unified_uniform_buffer;
+
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 97803d480..a787e27d2 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -125,6 +125,15 @@ void OGLProgram::Release() {
handle = 0;
}
+void OGLAssemblyProgram::Release() {
+ if (handle == 0) {
+ return;
+ }
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteProgramsARB(1, &handle);
+ handle = 0;
+}
+
void OGLPipeline::Create() {
if (handle != 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index de93f4212..f8b322227 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -167,6 +167,22 @@ public:
GLuint handle = 0;
};
+class OGLAssemblyProgram : private NonCopyable {
+public:
+ OGLAssemblyProgram() = default;
+
+ OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
+ ~OGLAssemblyProgram() {
+ Release();
+ }
+
+ /// Deletes the internal OpenGL resource
+ void Release();
+
+ GLuint handle = 0;
+};
+
class OGLPipeline : private NonCopyable {
public:
OGLPipeline() = default;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 9759a7078..c6a3bf3a1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,6 +20,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -29,6 +30,7 @@
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
namespace OpenGL {
@@ -97,6 +99,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
return {};
}
+constexpr GLenum AssemblyEnum(ShaderType shader_type) {
+ switch (shader_type) {
+ case ShaderType::Vertex:
+ return GL_VERTEX_PROGRAM_NV;
+ case ShaderType::TesselationControl:
+ return GL_TESS_CONTROL_PROGRAM_NV;
+ case ShaderType::TesselationEval:
+ return GL_TESS_EVALUATION_PROGRAM_NV;
+ case ShaderType::Geometry:
+ return GL_GEOMETRY_PROGRAM_NV;
+ case ShaderType::Fragment:
+ return GL_FRAGMENT_PROGRAM_NV;
+ case ShaderType::Compute:
+ return GL_COMPUTE_PROGRAM_NV;
+ }
+ return {};
+}
+
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
}
@@ -120,18 +140,44 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
return registry;
}
-std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
- u64 unique_identifier, const ShaderIR& ir,
- const Registry& registry, bool hint_retrievable = false) {
+ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
+ const ShaderIR& ir, const Registry& registry,
+ bool hint_retrievable = false) {
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
LOG_INFO(Render_OpenGL, "{}", shader_id);
- const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
- OGLShader shader;
- shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
+ auto program = std::make_shared<ProgramHandle>();
+
+ if (device.UseAssemblyShaders()) {
+ const std::string arb =
+ DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
+
+ GLuint& arb_prog = program->assembly_program.handle;
+
+// Commented out functions signal OpenGL errors but are compatible with apitrace.
+// Use them only to capture and replay on apitrace.
+#if 0
+ glGenProgramsNV(1, &arb_prog);
+ glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
+ reinterpret_cast<const GLubyte*>(arb.data()));
+#else
+ glGenProgramsARB(1, &arb_prog);
+ glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
+ static_cast<GLsizei>(arb.size()), arb.data());
+#endif
+ const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
+ if (err && *err) {
+ LOG_CRITICAL(Render_OpenGL, "{}", err);
+ LOG_INFO(Render_OpenGL, "\n{}", arb);
+ }
+ } else {
+ const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
+ OGLShader shader;
+ shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
+
+ program->source_program.Create(true, hint_retrievable, shader.handle);
+ }
- auto program = std::make_shared<OGLProgram>();
- program->Create(true, hint_retrievable, shader.handle);
return program;
}
@@ -151,22 +197,26 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
-CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
- std::shared_ptr<VideoCommon::Shader::Registry> registry,
- ShaderEntries entries, std::shared_ptr<OGLProgram> program)
- : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
- size_in_bytes{size_in_bytes}, program{std::move(program)} {}
+Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
+ ProgramSharedPtr program_)
+ : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
+ handle = program->assembly_program.handle;
+ if (handle == 0) {
+ handle = program->source_program.handle;
+ }
+ ASSERT(handle != 0);
+}
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
-GLuint CachedShader::GetHandle() const {
+GLuint Shader::GetHandle() const {
DEBUG_ASSERT(registry->IsConsistent());
- return program->handle;
+ return handle;
}
-Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type, ProgramCode code,
- ProgramCode code_b) {
+std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
+ Maxwell::ShaderProgram program_type,
+ ProgramCode code, ProgramCode code_b) {
const auto shader_type = GetShaderType(program_type);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
@@ -191,11 +241,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
- return std::shared_ptr<CachedShader>(new CachedShader(
- params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
+ return std::unique_ptr<Shader>(new Shader(
+ std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
}
-Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
+std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
+ ProgramCode code) {
const std::size_t size_in_bytes = code.size() * sizeof(u64);
auto& engine = params.system.GPU().KeplerCompute();
@@ -215,22 +266,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry));
- return std::shared_ptr<CachedShader>(new CachedShader(
- params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
+ return std::unique_ptr<Shader>(new Shader(std::move(registry),
+ MakeEntries(params.device, ir, ShaderType::Compute),
+ std::move(program)));
}
-Shader CachedShader::CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader,
- std::size_t size_in_bytes) {
- return std::shared_ptr<CachedShader>(
- new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
- precompiled_shader.entries, precompiled_shader.program));
+std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
+ const PrecompiledShader& precompiled_shader) {
+ return std::unique_ptr<Shader>(new Shader(
+ precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
Core::Frontend::EmuWindow& emu_window, const Device& device)
- : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
- disk_cache{system} {}
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
+ emu_window{emu_window}, device{device}, disk_cache{system} {}
+
+ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@@ -239,7 +291,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
- const std::vector gl_cache = disk_cache.LoadPrecompiled();
+ std::vector<ShaderDiskCachePrecompiled> gl_cache;
+ if (!device.UseAssemblyShaders()) {
+ // Only load precompiled cache when we are not using assembly shaders
+ gl_cache = disk_cache.LoadPrecompiled();
+ }
const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to
@@ -278,7 +334,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
auto registry = MakeRegistry(entry);
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
- std::shared_ptr<OGLProgram> program;
+ ProgramSharedPtr program;
if (precompiled_entry) {
// If the shader is precompiled, attempt to load it with
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
@@ -294,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
PrecompiledShader shader;
shader.program = std::move(program);
shader.registry = std::move(registry);
- shader.entries = MakeEntries(ir);
+ shader.entries = MakeEntries(device, ir, entry.type);
std::scoped_lock lock{mutex};
if (callback) {
@@ -332,6 +388,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
return;
}
+ if (device.UseAssemblyShaders()) {
+ // Don't store precompiled binaries for assembly shaders.
+ return;
+ }
+
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them
@@ -339,7 +400,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const u64 id = (*transferable)[i].unique_identifier;
const auto it = find_precompiled(id);
if (it == gl_cache.end()) {
- const GLuint program = runtime_cache.at(id).program->handle;
+ const GLuint program = runtime_cache.at(id).program->source_program.handle;
disk_cache.SavePrecompiled(id, program);
precompiled_cache_altered = true;
}
@@ -350,7 +411,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
-std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
+ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats) {
if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
@@ -358,15 +419,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
return {};
}
- auto program = std::make_shared<OGLProgram>();
- program->handle = glCreateProgram();
- glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
- glProgramBinary(program->handle, precompiled_entry.binary_format,
- precompiled_entry.binary.data(),
+ auto program = std::make_shared<ProgramHandle>();
+ GLuint& handle = program->source_program.handle;
+ handle = glCreateProgram();
+ glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
static_cast<GLsizei>(precompiled_entry.binary.size()));
GLint link_status;
- glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
+ glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
return {};
@@ -375,7 +436,7 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
return program;
}
-Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
+Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
return last_shaders[static_cast<std::size_t>(program)];
}
@@ -385,8 +446,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
// Look up shader in the cache based on address
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
- Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
- if (shader) {
+ if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
@@ -400,62 +460,64 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const u8* host_ptr_b = memory_manager.GetPointer(address_b);
code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
}
+ const std::size_t code_size = code.size() * sizeof(u64);
- const auto unique_identifier = GetUniqueIdentifier(
+ const u64 unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const ShaderParameters params{system, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
+ std::unique_ptr<Shader> shader;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
- shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
- std::move(code_b));
+ shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
} else {
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
- shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+ shader = Shader::CreateFromCache(params, found->second);
}
+ Shader* const result = shader.get();
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader), *cpu_addr, code_size);
} else {
- null_shader = shader;
+ null_shader = std::move(shader);
}
- return last_shaders[static_cast<std::size_t>(program)] = shader;
+ return last_shaders[static_cast<std::size_t>(program)] = result;
}
-Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
+Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
- auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
- if (kernel) {
+ if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
return kernel;
}
const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one
- auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
- const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
+ ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
+ const std::size_t code_size{code.size() * sizeof(u64)};
+ const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const ShaderParameters params{system, disk_cache, device,
*cpu_addr, host_ptr, unique_identifier};
+ std::unique_ptr<Shader> kernel;
const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) {
- kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
+ kernel = Shader::CreateKernelFromMemory(params, std::move(code));
} else {
- const std::size_t size_in_bytes = code.size() * sizeof(u64);
- kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
+ kernel = Shader::CreateFromCache(params, found->second);
}
+ Shader* const result = kernel.get();
if (cpu_addr) {
- Register(kernel);
+ Register(std::move(kernel), *cpu_addr, code_size);
} else {
- null_kernel = kernel;
+ null_kernel = std::move(kernel);
}
- return kernel;
+ return result;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 91690b470..994aaeaf2 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,12 +18,12 @@
#include "common/common_types.h"
#include "video_core/engines/shader_type.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
namespace Core {
class System;
@@ -35,16 +35,19 @@ class EmuWindow;
namespace OpenGL {
-class CachedShader;
class Device;
class RasterizerOpenGL;
-struct UnspecializedShader;
-using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct ProgramHandle {
+ OGLProgram source_program;
+ OGLAssemblyProgram assembly_program;
+};
+using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
+
struct PrecompiledShader {
- std::shared_ptr<OGLProgram> program;
+ ProgramSharedPtr program;
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
};
@@ -58,64 +61,56 @@ struct ShaderParameters {
u64 unique_identifier;
};
-class CachedShader final : public RasterizerCacheObject {
+class Shader final {
public:
- ~CachedShader();
+ ~Shader();
/// Gets the GL program handle for the shader
GLuint GetHandle() const;
- /// Returns the size in bytes of the shader
- std::size_t GetSizeInBytes() const override {
- return size_in_bytes;
- }
-
/// Gets the shader entries for the shader
const ShaderEntries& GetEntries() const {
return entries;
}
- static Shader CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b);
- static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
+ static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
+ Maxwell::ShaderProgram program_type,
+ ProgramCode program_code,
+ ProgramCode program_code_b);
+ static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
+ ProgramCode code);
- static Shader CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader,
- std::size_t size_in_bytes);
+ static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
+ const PrecompiledShader& precompiled_shader);
private:
- explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
- std::shared_ptr<VideoCommon::Shader::Registry> registry,
- ShaderEntries entries, std::shared_ptr<OGLProgram> program);
+ explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
+ ProgramSharedPtr program);
std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries;
- std::size_t size_in_bytes = 0;
- std::shared_ptr<OGLProgram> program;
+ ProgramSharedPtr program;
+ GLuint handle = 0;
};
-class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
+class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
Core::Frontend::EmuWindow& emu_window, const Device& device);
+ ~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
- Shader GetStageProgram(Maxwell::ShaderProgram program);
+ Shader* GetStageProgram(Maxwell::ShaderProgram program);
/// Gets a compute kernel in the passed address
- Shader GetComputeKernel(GPUVAddr code_addr);
-
-protected:
- // We do not have to flush this cache as things in it are never modified by us.
- void FlushObjectInner(const Shader& object) override {}
+ Shader* GetComputeKernel(GPUVAddr code_addr);
private:
- std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
+ ProgramSharedPtr GeneratePrecompiledProgram(
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
@@ -125,10 +120,10 @@ private:
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
- Shader null_shader{};
- Shader null_kernel{};
+ std::unique_ptr<Shader> null_shader;
+ std::unique_ptr<Shader> null_kernel;
- std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 99fd4ae2c..2c49aeaac 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::PixelImap;
using Tegra::Shader::Register;
+using Tegra::Shader::TextureType;
using VideoCommon::Shader::BuildTransformFeedback;
using VideoCommon::Shader::Registry;
@@ -61,8 +62,8 @@ struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
- static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
+constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
#define ftou floatBitsToUint
@@ -402,6 +403,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
+bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
+ const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
+ // We waste one UBO for emulation
+ const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
+ return num_ubos > num_available_ubos;
+}
+
struct GenericVaryingDescription {
std::string name;
u8 first_element = 0;
@@ -412,8 +420,9 @@ class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
ShaderType stage, std::string_view identifier, std::string_view suffix)
- : device{device}, ir{ir}, registry{registry}, stage{stage},
- identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
+ : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
+ suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
+ UseUnifiedUniforms(device, ir, stage)} {
if (stage != ShaderType::Compute) {
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
}
@@ -518,6 +527,9 @@ private:
if (device.HasImageLoadFormatted()) {
code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
}
+ if (device.HasTextureShadowLod()) {
+ code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
+ }
if (device.HasWarpIntrinsics()) {
code.AddLine("#extension GL_NV_gpu_shader5 : require");
code.AddLine("#extension GL_NV_shader_thread_group : require");
@@ -618,7 +630,9 @@ private:
break;
}
}
- if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
+
+ if (stage != ShaderType::Geometry &&
+ (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
if (ir.UsesLayer()) {
code.AddLine("int gl_Layer;");
}
@@ -647,6 +661,16 @@ private:
--code.scope;
code.AddLine("}};");
code.AddNewLine();
+
+ if (stage == ShaderType::Geometry) {
+ if (ir.UsesLayer()) {
+ code.AddLine("out int gl_Layer;");
+ }
+ if (ir.UsesViewportIndex()) {
+ code.AddLine("out int gl_ViewportIndex;");
+ }
+ }
+ code.AddNewLine();
}
void DeclareRegisters() {
@@ -834,12 +858,24 @@ private:
}
void DeclareConstantBuffers() {
+ if (use_unified_uniforms) {
+ const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
+ static_cast<u32>(ir.GetGlobalMemory().size());
+ code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
+ binding);
+ code.AddLine(" uint cbufs[];");
+ code.AddLine("}};");
+ code.AddNewLine();
+ return;
+ }
+
u32 binding = device.GetBaseBindings(stage).uniform_buffer;
- for (const auto& buffers : ir.GetConstantBuffers()) {
- const auto index = buffers.first;
+ for (const auto [index, info] : ir.GetConstantBuffers()) {
+ const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
+ const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
GetConstBufferBlock(index));
- code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
+ code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
code.AddLine("}};");
code.AddNewLine();
}
@@ -877,13 +913,13 @@ private:
return "samplerBuffer";
}
switch (sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
+ case TextureType::Texture1D:
return "sampler1D";
- case Tegra::Shader::TextureType::Texture2D:
+ case TextureType::Texture2D:
return "sampler2D";
- case Tegra::Shader::TextureType::Texture3D:
+ case TextureType::Texture3D:
return "sampler3D";
- case Tegra::Shader::TextureType::TextureCube:
+ case TextureType::TextureCube:
return "samplerCube";
default:
UNREACHABLE();
@@ -1038,42 +1074,51 @@ private:
if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
+ const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
+
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
- Type::Uint};
+ if (use_unified_uniforms) {
+ return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
+ Type::Uint};
+ } else {
+ return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4),
+ Type::Uint};
+ }
}
- if (std::holds_alternative<OperationNode>(*offset)) {
- // Indirect access
- const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
+ // Indirect access
+ if (use_unified_uniforms) {
+ return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
+ Visit(offset).AsUint()),
+ Type::Uint};
+ }
- if (!device.HasComponentIndexingBug()) {
- return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset),
- Type::Uint};
- }
+ const std::string final_offset = code.GenerateTemporary();
+ code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
- // AMD's proprietary GLSL compiler emits ill code for variable component access.
- // To bypass this driver bug generate 4 ifs, one per each component.
- const std::string pack = code.GenerateTemporary();
- code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
- final_offset);
-
- const std::string result = code.GenerateTemporary();
- code.AddLine("uint {};", result);
- for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
- code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
- pack, GetSwizzle(swizzle));
- }
- return {result, Type::Uint};
+ if (!device.HasComponentIndexingBug()) {
+ return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+ final_offset, final_offset),
+ Type::Uint};
}
- UNREACHABLE_MSG("Unmanaged offset node type");
+ // AMD's proprietary GLSL compiler emits ill code for variable component access.
+ // To bypass this driver bug generate 4 ifs, one per each component.
+ const std::string pack = code.GenerateTemporary();
+ code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+ final_offset);
+
+ const std::string result = code.GenerateTemporary();
+ code.AddLine("uint {};", result);
+ for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+ code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
+ GetSwizzle(swizzle));
+ }
+ return {result, Type::Uint};
}
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
@@ -1339,8 +1384,19 @@ private:
const std::size_t count = operation.GetOperandsCount();
const bool has_array = meta->sampler.is_array;
const bool has_shadow = meta->sampler.is_shadow;
+ const bool workaround_lod_array_shadow_as_grad =
+ !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
+ ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
+ meta->sampler.type == TextureType::TextureCube);
+
+ std::string expr = "texture";
+
+ if (workaround_lod_array_shadow_as_grad) {
+ expr += "Grad";
+ } else {
+ expr += function_suffix;
+ }
- std::string expr = "texture" + function_suffix;
if (!meta->aoffi.empty()) {
expr += "Offset";
} else if (!meta->ptp.empty()) {
@@ -1374,6 +1430,16 @@ private:
expr += ')';
}
+ if (workaround_lod_array_shadow_as_grad) {
+ switch (meta->sampler.type) {
+ case TextureType::Texture2D:
+ return expr + ", vec2(0.0), vec2(0.0))";
+ case TextureType::TextureCube:
+ return expr + ", vec3(0.0), vec3(0.0))";
+ }
+ UNREACHABLE();
+ }
+
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
@@ -1538,7 +1604,9 @@ private:
Expression target;
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op
+ // Writing to Register::ZeroIndex is a no op but we still have to visit the source
+ // as it might have side effects.
+ code.AddLine("{};", Visit(src).GetCode());
return {};
}
target = {GetRegister(gpr->GetIndex()), Type::Float};
@@ -1840,34 +1908,40 @@ private:
Type::HalfFloat};
}
- template <Type type>
- Expression LogicalLessThan(Operation operation) {
- return GenerateBinaryInfix(operation, "<", Type::Bool, type, type);
- }
+ template <const std::string_view& op, Type type, bool unordered = false>
+ Expression Comparison(Operation operation) {
+ static_assert(!unordered || type == Type::Float);
- template <Type type>
- Expression LogicalEqual(Operation operation) {
- return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
- }
-
- template <Type type>
- Expression LogicalLessEqual(Operation operation) {
- return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type);
- }
+ const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
- template <Type type>
- Expression LogicalGreaterThan(Operation operation) {
- return GenerateBinaryInfix(operation, ">", Type::Bool, type, type);
+ if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
+ // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
+ // and Nvidia's proprietary stacks. Manually force an ordered comparison.
+ return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
+ VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::Bool};
+ }
+ if constexpr (!unordered) {
+ return expr;
+ }
+ // Unordered comparisons are always true for NaN operands.
+ return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
+ VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::Bool};
}
- template <Type type>
- Expression LogicalNotEqual(Operation operation) {
- return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type);
+ Expression FOrdered(Operation operation) {
+ return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::Bool};
}
- template <Type type>
- Expression LogicalGreaterEqual(Operation operation) {
- return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
+ Expression FUnordered(Operation operation) {
+ return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::Bool};
}
Expression LogicalAddCarry(Operation operation) {
@@ -1992,8 +2066,19 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(
- operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
+ std::string expr{};
+
+ if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
+ ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
+ meta->sampler.type == TextureType::TextureCube)) {
+ LOG_ERROR(Render_OpenGL,
+ "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
+ expr = GenerateTexture(operation, "Lod", {});
+ } else {
+ expr = GenerateTexture(operation, "Lod",
+ {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
+ }
+
if (meta->sampler.is_shadow) {
expr = "vec4(" + expr + ')';
}
@@ -2303,6 +2388,18 @@ private:
return {"gl_SubGroupInvocationARB", Type::Uint};
}
+ template <const std::string_view& comparison>
+ Expression ThreadMask(Operation) {
+ if (device.HasWarpIntrinsics()) {
+ return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
+ }
+ if (device.HasShaderBallot()) {
+ return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
+ }
+ LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
+ return {"0U", Type::Uint};
+ }
+
Expression ShuffleIndexed(Operation operation) {
std::string value = VisitOperand(operation, 0).AsFloat();
@@ -2315,7 +2412,21 @@ private:
return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
}
- Expression MemoryBarrierGL(Operation) {
+ Expression Barrier(Operation) {
+ if (!ir.IsDecompiled()) {
+ LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
+ return {};
+ }
+ code.AddLine("barrier();");
+ return {};
+ }
+
+ Expression MemoryBarrierGroup(Operation) {
+ code.AddLine("groupMemoryBarrier();");
+ return {};
+ }
+
+ Expression MemoryBarrierGlobal(Operation) {
code.AddLine("memoryBarrier();");
return {};
}
@@ -2324,6 +2435,19 @@ private:
Func() = delete;
~Func() = delete;
+ static constexpr std::string_view LessThan = "<";
+ static constexpr std::string_view Equal = "==";
+ static constexpr std::string_view LessEqual = "<=";
+ static constexpr std::string_view GreaterThan = ">";
+ static constexpr std::string_view NotEqual = "!=";
+ static constexpr std::string_view GreaterEqual = ">=";
+
+ static constexpr std::string_view Eq = "Eq";
+ static constexpr std::string_view Ge = "Ge";
+ static constexpr std::string_view Gt = "Gt";
+ static constexpr std::string_view Le = "Le";
+ static constexpr std::string_view Lt = "Lt";
+
static constexpr std::string_view Add = "Add";
static constexpr std::string_view Min = "Min";
static constexpr std::string_view Max = "Max";
@@ -2425,27 +2549,34 @@ private:
&GLSLDecompiler::LogicalPick2,
&GLSLDecompiler::LogicalAnd2,
- &GLSLDecompiler::LogicalLessThan<Type::Float>,
- &GLSLDecompiler::LogicalEqual<Type::Float>,
- &GLSLDecompiler::LogicalLessEqual<Type::Float>,
- &GLSLDecompiler::LogicalGreaterThan<Type::Float>,
- &GLSLDecompiler::LogicalNotEqual<Type::Float>,
- &GLSLDecompiler::LogicalGreaterEqual<Type::Float>,
- &GLSLDecompiler::LogicalFIsNan,
-
- &GLSLDecompiler::LogicalLessThan<Type::Int>,
- &GLSLDecompiler::LogicalEqual<Type::Int>,
- &GLSLDecompiler::LogicalLessEqual<Type::Int>,
- &GLSLDecompiler::LogicalGreaterThan<Type::Int>,
- &GLSLDecompiler::LogicalNotEqual<Type::Int>,
- &GLSLDecompiler::LogicalGreaterEqual<Type::Int>,
-
- &GLSLDecompiler::LogicalLessThan<Type::Uint>,
- &GLSLDecompiler::LogicalEqual<Type::Uint>,
- &GLSLDecompiler::LogicalLessEqual<Type::Uint>,
- &GLSLDecompiler::LogicalGreaterThan<Type::Uint>,
- &GLSLDecompiler::LogicalNotEqual<Type::Uint>,
- &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
+ &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
+ &GLSLDecompiler::FOrdered,
+ &GLSLDecompiler::FUnordered,
+ &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
+ &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
+
+ &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
+ &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
+
+ &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
+ &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
&GLSLDecompiler::LogicalAddCarry,
@@ -2534,9 +2665,16 @@ private:
&GLSLDecompiler::VoteEqual,
&GLSLDecompiler::ThreadId,
+ &GLSLDecompiler::ThreadMask<Func::Eq>,
+ &GLSLDecompiler::ThreadMask<Func::Ge>,
+ &GLSLDecompiler::ThreadMask<Func::Gt>,
+ &GLSLDecompiler::ThreadMask<Func::Le>,
+ &GLSLDecompiler::ThreadMask<Func::Lt>,
&GLSLDecompiler::ShuffleIndexed,
- &GLSLDecompiler::MemoryBarrierGL,
+ &GLSLDecompiler::Barrier,
+ &GLSLDecompiler::MemoryBarrierGroup,
+ &GLSLDecompiler::MemoryBarrierGlobal,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2649,6 +2787,7 @@ private:
const std::string_view identifier;
const std::string_view suffix;
const Header header;
+ const bool use_unified_uniforms;
std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
@@ -2844,7 +2983,7 @@ void GLSLDecompiler::DecompileAST() {
} // Anonymous namespace
-ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
+ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2865,6 +3004,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
}
entries.shader_length = ir.GetLength();
+ entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
return entries;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e8a178764..451c9689a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -53,11 +53,13 @@ struct ShaderEntries {
std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images;
- u32 clip_distances{};
std::size_t shader_length{};
+ u32 clip_distances{};
+ bool use_unified_uniforms{};
};
-ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
+ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ Tegra::Engines::ShaderType stage);
std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
const VideoCommon::Shader::Registry& registry,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9e95a122b..653c3f2f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
namespace {
+using VideoCommon::Shader::SeparateSamplerKey;
+
using ShaderCacheVersionHash = std::array<u8, 64>;
struct ConstBufferKey {
@@ -37,18 +39,26 @@ struct ConstBufferKey {
u32 value = 0;
};
-struct BoundSamplerKey {
+struct BoundSamplerEntry {
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
-struct BindlessSamplerKey {
+struct SeparateSamplerEntry {
+ u32 cbuf1 = 0;
+ u32 cbuf2 = 0;
+ u32 offset1 = 0;
+ u32 offset2 = 0;
+ Tegra::Engines::SamplerDescriptor sampler;
+};
+
+struct BindlessSamplerEntry {
u32 cbuf = 0;
u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
-constexpr u32 NativeVersion = 20;
+constexpr u32 NativeVersion = 21;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
u32 texture_handler_size_value;
u32 num_keys;
u32 num_bound_samplers;
+ u32 num_separate_samplers;
u32 num_bindless_samplers;
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 ||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
+ file.ReadArray(&num_separate_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) {
return false;
}
@@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
}
std::vector<ConstBufferKey> flat_keys(num_keys);
- std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
- std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
+ std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
+ std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
+ std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
flat_bound_samplers.size() ||
+ file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
+ flat_separate_samplers.size() ||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
flat_bindless_samplers.size()) {
return false;
}
- for (const auto& key : flat_keys) {
- keys.insert({{key.cbuf, key.offset}, key.value});
+ for (const auto& entry : flat_keys) {
+ keys.insert({{entry.cbuf, entry.offset}, entry.value});
}
- for (const auto& key : flat_bound_samplers) {
- bound_samplers.emplace(key.offset, key.sampler);
+ for (const auto& entry : flat_bound_samplers) {
+ bound_samplers.emplace(entry.offset, entry.sampler);
}
- for (const auto& key : flat_bindless_samplers) {
- bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
+ for (const auto& entry : flat_separate_samplers) {
+ SeparateSamplerKey key;
+ key.buffers = {entry.cbuf1, entry.cbuf2};
+ key.offsets = {entry.offset1, entry.offset2};
+ separate_samplers.emplace(key, entry.sampler);
+ }
+ for (const auto& entry : flat_bindless_samplers) {
+ bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
}
return true;
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
return false;
}
@@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
}
- std::vector<BoundSamplerKey> flat_bound_samplers;
+ std::vector<BoundSamplerEntry> flat_bound_samplers;
flat_bound_samplers.reserve(bound_samplers.size());
for (const auto& [address, sampler] : bound_samplers) {
- flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
+ flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
+ }
+
+ std::vector<SeparateSamplerEntry> flat_separate_samplers;
+ flat_separate_samplers.reserve(separate_samplers.size());
+ for (const auto& [key, sampler] : separate_samplers) {
+ SeparateSamplerEntry entry;
+ std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
+ std::tie(entry.offset1, entry.offset2) = key.offsets;
+ entry.sampler = sampler;
+ flat_separate_samplers.push_back(entry);
}
- std::vector<BindlessSamplerKey> flat_bindless_samplers;
+ std::vector<BindlessSamplerEntry> flat_bindless_samplers;
flat_bindless_samplers.reserve(bindless_samplers.size());
for (const auto& [address, sampler] : bindless_samplers) {
flat_bindless_samplers.push_back(
- BindlessSamplerKey{address.first, address.second, sampler});
+ BindlessSamplerEntry{address.first, address.second, sampler});
}
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
flat_bound_samplers.size() &&
+ file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
+ flat_separate_samplers.size() &&
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
flat_bindless_samplers.size();
}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index d5be52e40..a79cef0e9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers;
+ VideoCommon::Shader::SeparateSamplerMap separate_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 9c7b0adbd..8e754fa90 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -6,45 +6,109 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
-ProgramManager::ProgramManager() = default;
+ProgramManager::ProgramManager(const Device& device) {
+ use_assembly_programs = device.UseAssemblyShaders();
+ if (use_assembly_programs) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ } else {
+ graphics_pipeline.Create();
+ glBindProgramPipeline(graphics_pipeline.handle);
+ }
+}
ProgramManager::~ProgramManager() = default;
-void ProgramManager::Create() {
- graphics_pipeline.Create();
- glBindProgramPipeline(graphics_pipeline.handle);
+void ProgramManager::BindCompute(GLuint program) {
+ if (use_assembly_programs) {
+ glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+ } else {
+ is_graphics_bound = false;
+ glUseProgram(program);
+ }
}
void ProgramManager::BindGraphicsPipeline() {
- if (!is_graphics_bound) {
- is_graphics_bound = true;
- glUseProgram(0);
+ if (use_assembly_programs) {
+ UpdateAssemblyPrograms();
+ } else {
+ UpdateSourcePrograms();
}
+}
- // Avoid updating the pipeline when values have no changed
- if (old_state == current_state) {
- return;
+void ProgramManager::BindHostPipeline(GLuint pipeline) {
+ if (use_assembly_programs) {
+ if (geometry_enabled) {
+ geometry_enabled = false;
+ old_state.geometry = 0;
+ glDisable(GL_GEOMETRY_PROGRAM_NV);
+ }
+ } else {
+ if (!is_graphics_bound) {
+ glUseProgram(0);
+ }
}
+ glBindProgramPipeline(pipeline);
+}
- // Workaround for AMD bug
- static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
- GL_FRAGMENT_SHADER_BIT};
- const GLuint handle = graphics_pipeline.handle;
- glUseProgramStages(handle, all_used_stages, 0);
- glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
- glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
- glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+void ProgramManager::RestoreGuestPipeline() {
+ if (use_assembly_programs) {
+ glBindProgramPipeline(0);
+ } else {
+ glBindProgramPipeline(graphics_pipeline.handle);
+ }
+}
+
+void ProgramManager::UpdateAssemblyPrograms() {
+ const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
+ if (current == old) {
+ return;
+ }
+ if (current == 0) {
+ if (enabled) {
+ enabled = false;
+ glDisable(stage);
+ }
+ return;
+ }
+ if (!enabled) {
+ enabled = true;
+ glEnable(stage);
+ }
+ glBindProgramARB(stage, current);
+ };
+
+ update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
+ update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
+ old_state.geometry);
+ update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
+ old_state.fragment);
old_state = current_state;
}
-void ProgramManager::BindComputeShader(GLuint program) {
- is_graphics_bound = false;
- glUseProgram(program);
+void ProgramManager::UpdateSourcePrograms() {
+ if (!is_graphics_bound) {
+ is_graphics_bound = true;
+ glUseProgram(0);
+ }
+
+ const GLuint handle = graphics_pipeline.handle;
+ const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
+ if (current == old) {
+ return;
+ }
+ glUseProgramStages(handle, stage, current);
+ };
+ update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
+ update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
+ update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
+
+ old_state = current_state;
}
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
@@ -54,4 +118,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index d2e47f2a9..0f03b4f12 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -11,7 +11,9 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
+
+class Device;
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
@@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
class ProgramManager {
public:
- explicit ProgramManager();
+ explicit ProgramManager(const Device& device);
~ProgramManager();
- void Create();
+ /// Binds a compute program
+ void BindCompute(GLuint program);
- /// Updates the graphics pipeline and binds it.
+ /// Updates bound programs.
void BindGraphicsPipeline();
- /// Binds a compute shader.
- void BindComputeShader(GLuint program);
+ /// Binds an OpenGL pipeline object unsynchronized with the guest state.
+ void BindHostPipeline(GLuint pipeline);
+
+ /// Rewinds BindHostPipeline state changes.
+ void RestoreGuestPipeline();
void UseVertexShader(GLuint program) {
- current_state.vertex_shader = program;
+ current_state.vertex = program;
}
void UseGeometryShader(GLuint program) {
- current_state.geometry_shader = program;
+ current_state.geometry = program;
}
void UseFragmentShader(GLuint program) {
- current_state.fragment_shader = program;
+ current_state.fragment = program;
}
private:
struct PipelineState {
- bool operator==(const PipelineState& rhs) const noexcept {
- return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
- geometry_shader == rhs.geometry_shader;
- }
-
- bool operator!=(const PipelineState& rhs) const noexcept {
- return !operator==(rhs);
- }
-
- GLuint vertex_shader = 0;
- GLuint fragment_shader = 0;
- GLuint geometry_shader = 0;
+ GLuint vertex = 0;
+ GLuint geometry = 0;
+ GLuint fragment = 0;
};
+ /// Update NV_gpu_program5 programs.
+ void UpdateAssemblyPrograms();
+
+ /// Update GLSL programs.
+ void UpdateSourcePrograms();
+
OGLPipeline graphics_pipeline;
- OGLPipeline compute_pipeline;
+
PipelineState current_state;
PipelineState old_state;
+
+ bool use_assembly_programs = false;
+
bool is_graphics_bound = true;
+
+ bool vertex_enabled = false;
+ bool geometry_enabled = false;
+ bool fragment_enabled = false;
};
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 6ec328c53..3655ff629 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -2,11 +2,13 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <deque>
+#include <tuple>
#include <vector>
+
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
-OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent,
- bool use_persistent)
+OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
: buffer_size(size) {
gl_buffer.Create();
@@ -29,34 +30,22 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
allocate_size *= 2;
}
- if (use_persistent) {
- persistent = true;
- coherent = prefer_coherent;
- const GLbitfield flags =
- GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
- glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
- mapped_ptr = static_cast<u8*>(glMapNamedBufferRange(
- gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
- } else {
- glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
+ static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
+ glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
+ mapped_ptr = static_cast<u8*>(
+ glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
+
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
+ glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
}
}
OGLStreamBuffer::~OGLStreamBuffer() {
- if (persistent) {
- glUnmapNamedBuffer(gl_buffer.handle);
- }
+ glUnmapNamedBuffer(gl_buffer.handle);
gl_buffer.Release();
}
-GLuint OGLStreamBuffer::GetHandle() const {
- return gl_buffer.handle;
-}
-
-GLsizeiptr OGLStreamBuffer::GetSize() const {
- return buffer_size;
-}
-
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= buffer_size);
ASSERT(alignment <= buffer_size);
@@ -68,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
bool invalidate = false;
if (buffer_pos + size > buffer_size) {
+ MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
+ glInvalidateBufferData(gl_buffer.handle);
+
buffer_pos = 0;
invalidate = true;
-
- if (persistent) {
- glUnmapNamedBuffer(gl_buffer.handle);
- }
}
- if (invalidate || !persistent) {
- MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
- GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
- (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
- (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
- mapped_ptr = static_cast<u8*>(
- glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
- mapped_offset = buffer_pos;
- }
-
- return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
+ return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
}
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
- if (!coherent && size > 0) {
- glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size);
- }
-
- if (!persistent) {
- glUnmapNamedBuffer(gl_buffer.handle);
+ if (size > 0) {
+ glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
}
buffer_pos += size;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index f8383cbd4..307a67113 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -11,15 +11,13 @@
namespace OpenGL {
+class Device;
+
class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false,
- bool use_persistent = true);
+ explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
~OGLStreamBuffer();
- GLuint GetHandle() const;
- GLsizeiptr GetSize() const;
-
/*
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
* and the optional alignment requirement.
@@ -32,15 +30,24 @@ public:
void Unmap(GLsizeiptr size);
+ GLuint Handle() const {
+ return gl_buffer.handle;
+ }
+
+ u64 Address() const {
+ return gpu_address;
+ }
+
+ GLsizeiptr Size() const noexcept {
+ return buffer_size;
+ }
+
private:
OGLBuffer gl_buffer;
- bool coherent = false;
- bool persistent = false;
-
+ GLuint64EXT gpu_address = 0;
GLintptr buffer_pos = 0;
GLsizeiptr buffer_size = 0;
- GLintptr mapped_offset = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 2729d1265..61505879b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -35,7 +35,7 @@ MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
namespace {
struct FormatTuple {
- GLint internal_format;
+ GLenum internal_format;
GLenum format = GL_NONE;
GLenum type = GL_NONE;
};
@@ -83,6 +83,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S
+ {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG8UI
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI
@@ -237,6 +238,12 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
return texture;
}
+constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source,
+ SwizzleSource w_source) {
+ return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+ (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+}
+
} // Anonymous namespace
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
@@ -256,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
target = GetTextureTarget(params.target);
texture = CreateTexture(params, target, internal_format, texture_buffer);
DecorateSurfaceName();
- main_view = CreateViewInner(
- ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels),
- true);
+
+ u32 num_layers = 1;
+ if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
+ num_layers = params.depth;
+ }
+
+ main_view =
+ CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
}
CachedSurface::~CachedSurface() = default;
@@ -380,7 +392,7 @@ void CachedSurface::DecorateSurfaceName() {
}
void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
- LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix);
+ LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
}
View CachedSurface::CreateView(const ViewParams& view_key) {
@@ -396,32 +408,33 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
}
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
- const bool is_proxy)
- : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
- target = GetTextureTarget(params.target);
- format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format;
+ bool is_proxy)
+ : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format},
+ target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
if (!is_proxy) {
- texture_view = CreateTextureView();
+ main_view = CreateTextureView();
}
- swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A);
}
CachedSurfaceView::~CachedSurfaceView() = default;
-void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
+void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
ASSERT(params.num_levels == 1);
+ if (params.target == SurfaceTarget::Texture3D) {
+ if (params.num_layers > 1) {
+ ASSERT(params.base_layer == 0);
+ glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
+ } else {
+ glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
+ params.base_level, params.base_layer);
+ }
+ return;
+ }
+
if (params.num_layers > 1) {
- // Layered framebuffer attachments
UNIMPLEMENTED_IF(params.base_layer != 0);
-
- switch (params.target) {
- case SurfaceTarget::Texture2DArray:
- glFramebufferTexture(target, attachment, GetTexture(), 0);
- break;
- default:
- UNIMPLEMENTED();
- }
+ glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
return;
}
@@ -429,16 +442,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
const GLuint texture = surface.GetTexture();
switch (surface.GetSurfaceParams().target) {
case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
+ glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
+ glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(target, attachment, texture, params.base_level,
+ glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
params.base_layer);
break;
default:
@@ -446,35 +459,62 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
}
}
-void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source,
+GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source,
SwizzleSource z_source, SwizzleSource w_source) {
- u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
- if (new_swizzle == swizzle)
- return;
- swizzle = new_swizzle;
- const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
- GetSwizzleSource(z_source), GetSwizzleSource(w_source)};
- const GLuint handle = GetTexture();
- const PixelFormat format = surface.GetSurfaceParams().pixel_format;
- switch (format) {
+ if (GetSurfaceParams().IsBuffer()) {
+ return GetTexture();
+ }
+ const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
+ if (current_swizzle == new_swizzle) {
+ return current_view;
+ }
+ current_swizzle = new_swizzle;
+
+ const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
+ OGLTextureView& view = entry->second;
+ if (!is_cache_miss) {
+ current_view = view.handle;
+ return view.handle;
+ }
+ view = CreateTextureView();
+ current_view = view.handle;
+
+ std::array swizzle{x_source, y_source, z_source, w_source};
+
+ switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
case PixelFormat::Z24S8:
case PixelFormat::Z32FS8:
case PixelFormat::S8Z24:
- glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
+ UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
+ glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
GetComponent(format, x_source == SwizzleSource::R));
- break;
- default:
- glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+
+ // Make sure we sample the first component
+ std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) {
+ return value == SwizzleSource::G ? SwizzleSource::R : value;
+ });
+ [[fallthrough]];
+ default: {
+ const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
+ GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
+ glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
break;
}
+ }
+ return view.handle;
}
OGLTextureView CachedSurfaceView::CreateTextureView() const {
OGLTextureView texture_view;
texture_view.Create();
- glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
- params.num_levels, params.base_layer, params.num_layers);
+ if (target == GL_TEXTURE_3D) {
+ glTextureView(texture_view.handle, target, surface.texture.handle, format,
+ params.base_level, params.num_levels, 0, 1);
+ } else {
+ glTextureView(texture_view.handle, target, surface.texture.handle, format,
+ params.base_level, params.num_levels, params.base_layer, params.num_layers);
+ }
ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
return texture_view;
@@ -517,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) {
const auto& src_params{src_view->GetSurfaceParams()};
const auto& dst_params{dst_view->GetSurfaceParams()};
- UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
- UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
+ UNIMPLEMENTED_IF(src_params.depth != 1);
+ UNIMPLEMENTED_IF(dst_params.depth != 1);
state_tracker.NotifyScissor0();
state_tracker.NotifyFramebuffer();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 02d9981a1..bfc4ddf5d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -80,10 +80,12 @@ public:
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
~CachedSurfaceView();
- /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
- void Attach(GLenum attachment, GLenum target) const;
+ /// @brief Attaches this texture view to the currently bound fb_target framebuffer
+ /// @param attachment Attachment to bind textures to
+ /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
+ void Attach(GLenum attachment, GLenum fb_target) const;
- void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
+ GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
@@ -98,7 +100,7 @@ public:
if (is_proxy) {
return surface.GetTexture();
}
- return texture_view.handle;
+ return main_view.handle;
}
GLenum GetFormat() const {
@@ -110,23 +112,19 @@ public:
}
private:
- u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source) const {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
- }
-
OGLTextureView CreateTextureView() const;
CachedSurface& surface;
- GLenum target{};
- GLenum format{};
+ const GLenum format;
+ const GLenum target;
+ const bool is_proxy;
+
+ std::unordered_map<u32, OGLTextureView> view_cache;
+ OGLTextureView main_view;
- OGLTextureView texture_view;
- u32 swizzle{};
- bool is_proxy{};
+ // Use an invalid default so it always fails the comparison test
+ u32 current_swizzle = 0xffffffff;
+ GLuint current_view = 0;
};
class TextureCacheOpenGL final : public TextureCacheBase {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 2c0c77c28..774e70a5b 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -24,10 +24,11 @@ namespace MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
+inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
- case Maxwell::VertexAttribute::Type::UnsignedInt:
case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::UnsignedInt:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -46,12 +47,11 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
}
- case Maxwell::VertexAttribute::Type::SignedInt:
+ break;
case Maxwell::VertexAttribute::Type::SignedNorm:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedInt:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -70,10 +70,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
}
+ break;
case Maxwell::VertexAttribute::Type::Float:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_16:
@@ -86,46 +84,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
}
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
- switch (attrib.size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- case Maxwell::VertexAttribute::Size::Size_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return GL_UNSIGNED_BYTE;
- case Maxwell::VertexAttribute::Size::Size_16:
- case Maxwell::VertexAttribute::Size::Size_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return GL_UNSIGNED_SHORT;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
- }
- case Maxwell::VertexAttribute::Type::SignedScaled:
- switch (attrib.size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- case Maxwell::VertexAttribute::Size::Size_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return GL_BYTE;
- case Maxwell::VertexAttribute::Size::Size_16:
- case Maxwell::VertexAttribute::Size::Size_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return GL_SHORT;
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
- return {};
- }
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
- return {};
+ break;
}
+ UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(),
+ attrib.SizeString());
+ return {};
}
inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -137,8 +101,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
case Maxwell::IndexFormat::UnsignedInt:
return GL_UNSIGNED_INT;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
- UNREACHABLE();
+ UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
return {};
}
@@ -180,33 +143,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
}
inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
- Tegra::Texture::TextureMipmapFilter mip_filter_mode) {
+ Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
switch (filter_mode) {
- case Tegra::Texture::TextureFilter::Linear: {
- switch (mip_filter_mode) {
+ case Tegra::Texture::TextureFilter::Nearest:
+ switch (mipmap_filter_mode) {
case Tegra::Texture::TextureMipmapFilter::None:
- return GL_LINEAR;
+ return GL_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
- return GL_LINEAR_MIPMAP_NEAREST;
+ return GL_NEAREST_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
- return GL_LINEAR_MIPMAP_LINEAR;
+ return GL_NEAREST_MIPMAP_LINEAR;
}
break;
- }
- case Tegra::Texture::TextureFilter::Nearest: {
- switch (mip_filter_mode) {
+ case Tegra::Texture::TextureFilter::Linear:
+ switch (mipmap_filter_mode) {
case Tegra::Texture::TextureMipmapFilter::None:
- return GL_NEAREST;
+ return GL_LINEAR;
case Tegra::Texture::TextureMipmapFilter::Nearest:
- return GL_NEAREST_MIPMAP_NEAREST;
+ return GL_LINEAR_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
- return GL_NEAREST_MIPMAP_LINEAR;
+ return GL_LINEAR_MIPMAP_LINEAR;
}
break;
}
- }
- LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
- return GL_LINEAR;
+ UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
+ static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
+ return GL_NEAREST;
}
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
@@ -229,10 +191,9 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
} else {
return GL_MIRROR_CLAMP_TO_EDGE;
}
- default:
- LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
- return GL_REPEAT;
}
+ UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+ return GL_REPEAT;
}
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
@@ -254,8 +215,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
case Tegra::Texture::DepthCompareFunc::Always:
return GL_ALWAYS;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
- static_cast<u32>(func));
+ UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
return GL_GREATER;
}
@@ -277,7 +237,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
case Maxwell::Blend::Equation::MaxGL:
return GL_MAX;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+ UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
return GL_FUNC_ADD;
}
@@ -341,7 +301,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
+ UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
return GL_ZERO;
}
@@ -361,7 +321,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
case Tegra::Texture::SwizzleSource::OneFloat:
return GL_ONE;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
+ UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
return GL_ZERO;
}
@@ -392,7 +352,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
case Maxwell::ComparisonOp::AlwaysOld:
return GL_ALWAYS;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
+ UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
return GL_ALWAYS;
}
@@ -423,7 +383,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
case Maxwell::StencilOp::DecrWrapOGL:
return GL_DECR_WRAP;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
+ UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
return GL_KEEP;
}
@@ -434,7 +394,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
case Maxwell::FrontFace::CounterClockWise:
return GL_CCW;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
+ UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
return GL_CCW;
}
@@ -447,7 +407,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
+ UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
return GL_BACK;
}
@@ -486,7 +446,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
case Maxwell::LogicOperation::Set:
return GL_SET;
}
- LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
+ UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
return GL_COPY;
}
@@ -503,5 +463,10 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
return GL_FILL;
}
+inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
+ // Enumeration order matches register order. We can convert it arithmetically.
+ return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
+}
+
} // namespace MaxwellToGL
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b2a179746..c40adb6e7 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,7 +316,7 @@ public:
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
Core::Frontend::GraphicsContext& context)
: RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
- has_debug_tool{HasDebugTool()} {}
+ program_manager{device}, has_debug_tool{HasDebugTool()} {}
RendererOpenGL::~RendererOpenGL() = default;
@@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() {
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
- // Create program pipeline
- program_manager.Create();
+ pipeline.Create();
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
// Generate VBO handle for drawing
vertex_buffer.Create();
@@ -487,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
+
+ // Enable unified vertex attributes and query vertex buffer address when the driver supports it
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+
+ glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
+ glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
+ &vertex_buffer_address);
+ }
}
void RendererOpenGL::AddTelemetryFields() {
@@ -508,7 +518,7 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
+ rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
program_manager, state_tracker);
}
@@ -620,10 +630,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
- program_manager.UseVertexShader(vertex_program.handle);
- program_manager.UseGeometryShader(0);
- program_manager.UseFragmentShader(fragment_program.handle);
- program_manager.BindGraphicsPipeline();
+ program_manager.BindHostPipeline(pipeline.handle);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
@@ -658,13 +665,21 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
offsetof(ScreenRectVertex, tex_coord));
glVertexAttribBinding(PositionLocation, 0);
glVertexAttribBinding(TexCoordLocation, 0);
- glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
+ glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
+ sizeof(vertices));
+ } else {
+ glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
+ }
glBindTextureUnit(0, screen_info.display_texture);
glBindSampler(0, 0);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+ program_manager.RestoreGuestPipeline();
}
bool RendererOpenGL::TryPresent(int timeout_ms) {
@@ -751,8 +766,9 @@ void RendererOpenGL::RenderScreenshot() {
}
bool RendererOpenGL::Init() {
- if (GLAD_GL_KHR_debug) {
+ if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
+ glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(DebugHandler, nullptr);
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 50b647661..8b18d32e6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -95,6 +96,7 @@ private:
Core::Frontend::EmuWindow& emu_window;
Core::System& system;
Core::Frontend::GraphicsContext& context;
+ const Device device;
StateTracker state_tracker{system};
@@ -102,13 +104,17 @@ private:
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;
+ OGLPipeline pipeline;
OGLFramebuffer screenshot_framebuffer;
+ // GPU address of the vertex buffer
+ GLuint64EXT vertex_buffer_address = 0;
+
/// Display information for Switch screen
ScreenInfo screen_info;
/// Global dummy shader pipeline
- GLShader::ProgramManager program_manager;
+ ProgramManager program_manager;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 648b1e71b..424278816 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <cstring>
#include <tuple>
@@ -70,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
u32 packed_front_face = PackFrontFace(regs.front_face);
- if (regs.screen_y_control.triangle_rast_flip != 0 &&
- regs.viewport_transform[0].scale_y > 0.0f) {
+ if (regs.screen_y_control.triangle_rast_flip != 0) {
// Flip front face
packed_front_face = 1 - packed_front_face;
}
@@ -92,6 +92,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
+ rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
std::memcpy(&point_size, &regs.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast
}
@@ -101,6 +102,12 @@ void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept {
}
}
+void FixedPipelineState::ViewportSwizzles::Fill(const Maxwell& regs) noexcept {
+ const auto& transform = regs.viewport_transform;
+ std::transform(transform.begin(), transform.end(), swizzles.begin(),
+ [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
+}
+
void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
@@ -144,6 +151,7 @@ void FixedPipelineState::Fill(const Maxwell& regs) {
rasterizer.Fill(regs);
depth_stencil.Fill(regs);
color_blending.Fill(regs);
+ viewport_swizzles.Fill(regs);
}
std::size_t FixedPipelineState::Hash() const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 8652067a7..31a6398f2 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -164,6 +164,7 @@ struct FixedPipelineState {
BitField<23, 1, u32> tessellation_clockwise;
BitField<24, 1, u32> logic_op_enable;
BitField<25, 4, u32> logic_op;
+ BitField<29, 1, u32> rasterize_enable;
};
// TODO(Rodrigo): Move this to push constants
@@ -233,10 +234,17 @@ struct FixedPipelineState {
void Fill(const Maxwell& regs) noexcept;
};
+ struct ViewportSwizzles {
+ std::array<u16, Maxwell::NumViewports> swizzles;
+
+ void Fill(const Maxwell& regs) noexcept;
+ };
+
VertexInput vertex_input;
Rasterizer rasterizer;
DepthStencil depth_stencil;
ColorBlending color_blending;
+ ViewportSwizzles viewport_swizzles;
void Fill(const Maxwell& regs);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 8681b821f..d7f1ae89f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -21,29 +21,29 @@ namespace Sampler {
VkFilter Filter(Tegra::Texture::TextureFilter filter) {
switch (filter) {
- case Tegra::Texture::TextureFilter::Linear:
- return VK_FILTER_LINEAR;
case Tegra::Texture::TextureFilter::Nearest:
return VK_FILTER_NEAREST;
+ case Tegra::Texture::TextureFilter::Linear:
+ return VK_FILTER_LINEAR;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
+ UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
return {};
}
VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
switch (mipmap_filter) {
case Tegra::Texture::TextureMipmapFilter::None:
- // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
- // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
- // use an image view with a single mipmap level to emulate this.
- return VK_SAMPLER_MIPMAP_MODE_LINEAR;
- ;
- case Tegra::Texture::TextureMipmapFilter::Linear:
- return VK_SAMPLER_MIPMAP_MODE_LINEAR;
+ // There are no Vulkan filter modes that directly correspond to OpenGL minification filters
+ // of GL_LINEAR or GL_NEAREST, but they can be emulated using
+ // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
+ // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
+ return VK_SAMPLER_MIPMAP_MODE_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Nearest:
return VK_SAMPLER_MIPMAP_MODE_NEAREST;
+ case Tegra::Texture::TextureMipmapFilter::Linear:
+ return VK_SAMPLER_MIPMAP_MODE_LINEAR;
}
- UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
+ UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
return {};
}
@@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
case Tegra::Texture::WrapMode::MirrorOnceBorder:
UNIMPLEMENTED();
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
- default:
- UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
- return {};
}
+ UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+ return {};
}
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -142,14 +141,14 @@ struct FormatTuple {
{VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16
{VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16
{VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4
- {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8
+ {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8
{VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F
{VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F
{VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F
{VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
{VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
{VK_FORMAT_UNDEFINED}, // R16S
- {VK_FORMAT_UNDEFINED}, // R16UI
+ {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
{VK_FORMAT_UNDEFINED}, // R16I
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
@@ -160,6 +159,7 @@ struct FormatTuple {
{VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB
{VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U
{VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S
+ {VK_FORMAT_R8G8_UINT, Attachable | Storage}, // RG8UI
{VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI
{VK_FORMAT_UNDEFINED}, // RGBX16F
{VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI
@@ -167,7 +167,7 @@ struct FormatTuple {
{VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8
{VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5
{VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4
- {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB
+ {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB
{VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB
{VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB
{VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB
@@ -287,14 +287,35 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
- default:
- UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
- return {};
}
+ UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+ return {};
}
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
switch (type) {
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ }
+ break;
case Maxwell::VertexAttribute::Type::SignedNorm:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
@@ -315,50 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_SNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
- default:
- break;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_UNORM;
+ return VK_FORMAT_R8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_UNORM;
+ return VK_FORMAT_R8G8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_UNORM;
+ return VK_FORMAT_R8G8B8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_UNORM;
+ return VK_FORMAT_R8G8B8A8_USCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_UNORM;
+ return VK_FORMAT_R16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_UNORM;
+ return VK_FORMAT_R16G16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_UNORM;
+ return VK_FORMAT_R16G16B16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_UNORM;
+ return VK_FORMAT_R16G16B16A16_USCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
- default:
- break;
+ return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::SignedInt:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
switch (size) {
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SINT;
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SINT;
+ return VK_FORMAT_R8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SINT;
+ return VK_FORMAT_R8G8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SINT;
+ return VK_FORMAT_R8G8B8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SINT;
- case Maxwell::VertexAttribute::Size::Size_32:
- return VK_FORMAT_R32_SINT;
- default:
- break;
+ return VK_FORMAT_R8G8B8A8_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedInt:
@@ -387,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_UINT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_UINT;
- default:
- break;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UINT_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedInt:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_USCALED;
+ return VK_FORMAT_R8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_USCALED;
+ return VK_FORMAT_R8G8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_USCALED;
+ return VK_FORMAT_R8G8B8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_USCALED;
+ return VK_FORMAT_R8G8B8A8_SINT;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_USCALED;
+ return VK_FORMAT_R16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_USCALED;
+ return VK_FORMAT_R16G16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_USCALED;
+ return VK_FORMAT_R16G16B16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_USCALED;
- default:
- break;
+ return VK_FORMAT_R16G16B16A16_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return VK_FORMAT_R32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return VK_FORMAT_R32G32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return VK_FORMAT_R32G32B32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return VK_FORMAT_R32G32B32A32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SINT_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::Float:
switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SSCALED;
+ return VK_FORMAT_R16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SSCALED;
+ return VK_FORMAT_R16G16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SSCALED;
+ return VK_FORMAT_R16G16B16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SSCALED;
- default:
- break;
- }
- break;
- case Maxwell::VertexAttribute::Type::Float:
- switch (size) {
+ return VK_FORMAT_R16G16B16A16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32:
return VK_FORMAT_R32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -445,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SFLOAT;
- default:
- break;
}
break;
}
@@ -672,4 +677,27 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
return {};
}
+VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
+ switch (swizzle) {
+ case Maxwell::ViewportSwizzle::PositiveX:
+ return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV;
+ case Maxwell::ViewportSwizzle::NegativeX:
+ return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_X_NV;
+ case Maxwell::ViewportSwizzle::PositiveY:
+ return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Y_NV;
+ case Maxwell::ViewportSwizzle::NegativeY:
+ return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Y_NV;
+ case Maxwell::ViewportSwizzle::PositiveZ:
+ return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Z_NV;
+ case Maxwell::ViewportSwizzle::NegativeZ:
+ return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Z_NV;
+ case Maxwell::ViewportSwizzle::PositiveW:
+ return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_W_NV;
+ case Maxwell::ViewportSwizzle::NegativeW:
+ return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV;
+ }
+ UNREACHABLE_MSG("Invalid swizzle={}", static_cast<int>(swizzle));
+ return {};
+}
+
} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 81bce4c6c..7e213452f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -59,4 +59,6 @@ VkCullModeFlags CullFace(Maxwell::CullFace cull_face);
VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
+VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
+
} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 59b441943..2d9b18ed9 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -13,6 +13,7 @@
#include <fmt/format.h>
#include "common/dynamic_library.h"
+#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/telemetry.h"
#include "core/core.h"
@@ -76,7 +77,8 @@ Common::DynamicLibrary OpenVulkanLibrary() {
char* libvulkan_env = getenv("LIBVULKAN_PATH");
if (!libvulkan_env || !library.Open(libvulkan_env)) {
// Use the libvulkan.dylib from the application bundle.
- std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
+ const std::string filename =
+ FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
library.Open(filename.c_str());
}
#else
@@ -153,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc
}
}
- static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"};
- vk::Span<const char*> layers = layers_data;
- if (!enable_layers) {
- layers = {};
+ std::vector<const char*> layers;
+ layers.reserve(1);
+ if (enable_layers) {
+ layers.push_back("VK_LAYER_KHRONOS_validation");
+ }
+
+ const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
+ if (!layer_properties) {
+ LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
+ layers.clear();
+ }
+
+ for (auto layer_it = layers.begin(); layer_it != layers.end();) {
+ const char* const layer = *layer_it;
+ const auto it = std::find_if(
+ layer_properties->begin(), layer_properties->end(),
+ [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
+ if (it == layer_properties->end()) {
+ LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
+ layer_it = layers.erase(layer_it);
+ } else {
+ ++layer_it;
+ }
}
+
vk::Instance instance = vk::Instance::Create(layers, extensions, dld);
if (!instance) {
LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5b494da8c..f10f96cd8 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -7,6 +7,7 @@
#include <memory>
#include "core/core.h"
+#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -36,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
} // Anonymous namespace
-CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
- VAddr cpu_addr, std::size_t size)
- : VideoCommon::BufferBlock{cpu_addr, size} {
+Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
+ VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
+ : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
VkBufferCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
ci.pNext = nullptr;
@@ -53,46 +54,17 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me
buffer.commit = memory_manager.Commit(buffer.handle, false);
}
-CachedBufferBlock::~CachedBufferBlock() = default;
+Buffer::~Buffer() = default;
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
- : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
- CreateStreamBuffer(device,
- scheduler)},
- device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
- staging_pool} {}
-
-VKBufferCache::~VKBufferCache() = default;
-
-Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
- return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
-}
-
-VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
- return buffer->GetHandle();
-}
-
-VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
- size = std::max(size, std::size_t(4));
- const auto& empty = staging_pool.GetUnusedBuffer(size, false);
- scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
- cmdbuf.FillBuffer(buffer, 0, size, 0);
- });
- return *empty.handle;
-}
-
-void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) {
+void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
std::memcpy(staging.commit->Map(size), data, size);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
- size](vk::CommandBuffer cmdbuf) {
- cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
+
+ const VkBuffer handle = Handle();
+ scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
+ cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -101,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
+ barrier.buffer = handle;
barrier.offset = offset;
barrier.size = size;
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
@@ -109,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
});
}
-void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) {
+void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
const auto& staging = staging_pool.GetUnusedBuffer(size, true);
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
- size](vk::CommandBuffer cmdbuf) {
+
+ const VkBuffer handle = Handle();
+ scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
@@ -122,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- barrier.buffer = buffer;
+ barrier.buffer = handle;
barrier.offset = offset;
barrier.size = size;
@@ -130,18 +102,20 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
- cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size});
+ cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
});
scheduler.Finish();
std::memcpy(data, staging.commit->Map(size), size);
}
-void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) {
+void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size) const {
scheduler.RequestOutsideRenderPassOperationContext();
- scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
- dst_offset, size](vk::CommandBuffer cmdbuf) {
+
+ const VkBuffer dst_buffer = Handle();
+ scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
+ size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
std::array<VkBufferMemoryBarrier, 2> barriers;
@@ -168,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
});
}
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+ const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
+ : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
+ CreateStreamBuffer(device,
+ scheduler)},
+ device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
+ staging_pool} {}
+
+VKBufferCache::~VKBufferCache() = default;
+
+std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
+ return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
+ size);
+}
+
+VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
+ size = std::max(size, std::size_t(4));
+ const auto& empty = staging_pool.GetUnusedBuffer(size, false);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
+ cmdbuf.FillBuffer(buffer, 0, size, 0);
+ });
+ return {*empty.handle, 0, 0};
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d..3630aca77 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,7 +8,6 @@
#include "common/common_types.h"
#include "video_core/buffer_cache/buffer_cache.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
@@ -24,22 +23,34 @@ class VKDevice;
class VKMemoryManager;
class VKScheduler;
-class CachedBufferBlock final : public VideoCommon::BufferBlock {
+class Buffer final : public VideoCommon::BufferBlock {
public:
- explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
- VAddr cpu_addr, std::size_t size);
- ~CachedBufferBlock();
+ explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
+ VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
+ ~Buffer();
- VkBuffer GetHandle() const {
+ void Upload(std::size_t offset, std::size_t size, const u8* data) const;
+
+ void Download(std::size_t offset, std::size_t size, u8* data) const;
+
+ void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
+ std::size_t size) const;
+
+ VkBuffer Handle() const {
return *buffer.handle;
}
+ u64 Address() const {
+ return 0;
+ }
+
private:
+ VKScheduler& scheduler;
+ VKStagingBufferPool& staging_pool;
+
VKBuffer buffer;
};
-using Buffer = std::shared_ptr<CachedBufferBlock>;
-
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
public:
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@@ -47,21 +58,10 @@ public:
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache();
- VkBuffer GetEmptyBuffer(std::size_t size) override;
+ BufferInfo GetEmptyBuffer(std::size_t size) override;
protected:
- VkBuffer ToHandle(const Buffer& buffer) override;
-
- Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
-
- void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) override;
-
- void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
- u8* data) override;
-
- void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) override;
+ std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
private:
const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 8e1b46277..281bf9ac3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
};
add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
- add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size());
+ add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
+ add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
VkDescriptorSetLayoutCreateInfo ci;
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index 890fd52cf..9259b618d 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
+ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
VkDescriptorPoolCreateInfo ci;
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 0e4bbca97..9fd8ac3f6 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -73,74 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
- static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32,
- VK_FORMAT_A8B8G8R8_UINT_PACK32,
- VK_FORMAT_A8B8G8R8_SNORM_PACK32,
- VK_FORMAT_A8B8G8R8_SRGB_PACK32,
- VK_FORMAT_B5G6R5_UNORM_PACK16,
- VK_FORMAT_A2B10G10R10_UNORM_PACK32,
- VK_FORMAT_A1R5G5B5_UNORM_PACK16,
- VK_FORMAT_R32G32B32A32_SFLOAT,
- VK_FORMAT_R32G32B32A32_UINT,
- VK_FORMAT_R32G32_SFLOAT,
- VK_FORMAT_R32G32_UINT,
- VK_FORMAT_R16G16B16A16_UINT,
- VK_FORMAT_R16G16B16A16_SNORM,
- VK_FORMAT_R16G16B16A16_UNORM,
- VK_FORMAT_R16G16_UNORM,
- VK_FORMAT_R16G16_SNORM,
- VK_FORMAT_R16G16_SFLOAT,
- VK_FORMAT_R16_UNORM,
- VK_FORMAT_R8G8B8A8_SRGB,
- VK_FORMAT_R8G8_UNORM,
- VK_FORMAT_R8G8_SNORM,
- VK_FORMAT_R8_UNORM,
- VK_FORMAT_R8_UINT,
- VK_FORMAT_B10G11R11_UFLOAT_PACK32,
- VK_FORMAT_R32_SFLOAT,
- VK_FORMAT_R32_UINT,
- VK_FORMAT_R32_SINT,
- VK_FORMAT_R16_SFLOAT,
- VK_FORMAT_R16G16B16A16_SFLOAT,
- VK_FORMAT_B8G8R8A8_UNORM,
- VK_FORMAT_R4G4B4A4_UNORM_PACK16,
- VK_FORMAT_D32_SFLOAT,
- VK_FORMAT_D16_UNORM,
- VK_FORMAT_D16_UNORM_S8_UINT,
- VK_FORMAT_D24_UNORM_S8_UINT,
- VK_FORMAT_D32_SFLOAT_S8_UINT,
- VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
- VK_FORMAT_BC2_UNORM_BLOCK,
- VK_FORMAT_BC3_UNORM_BLOCK,
- VK_FORMAT_BC4_UNORM_BLOCK,
- VK_FORMAT_BC5_UNORM_BLOCK,
- VK_FORMAT_BC5_SNORM_BLOCK,
- VK_FORMAT_BC7_UNORM_BLOCK,
- VK_FORMAT_BC6H_UFLOAT_BLOCK,
- VK_FORMAT_BC6H_SFLOAT_BLOCK,
- VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
- VK_FORMAT_BC2_SRGB_BLOCK,
- VK_FORMAT_BC3_SRGB_BLOCK,
- VK_FORMAT_BC7_SRGB_BLOCK,
- VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
- VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
- VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
- VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
- VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
- VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
- VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
- VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
- VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
- VK_FORMAT_E5B9G9R9_UFLOAT_PACK32};
+ static constexpr std::array formats{
+ VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ VK_FORMAT_A8B8G8R8_UINT_PACK32,
+ VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+ VK_FORMAT_A8B8G8R8_SRGB_PACK32,
+ VK_FORMAT_B5G6R5_UNORM_PACK16,
+ VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R16G16B16A16_UINT,
+ VK_FORMAT_R16G16B16A16_SNORM,
+ VK_FORMAT_R16G16B16A16_UNORM,
+ VK_FORMAT_R16G16_UNORM,
+ VK_FORMAT_R16G16_SNORM,
+ VK_FORMAT_R16G16_SFLOAT,
+ VK_FORMAT_R16_UNORM,
+ VK_FORMAT_R16_UINT,
+ VK_FORMAT_R8G8B8A8_SRGB,
+ VK_FORMAT_R8G8_UNORM,
+ VK_FORMAT_R8G8_SNORM,
+ VK_FORMAT_R8G8_UINT,
+ VK_FORMAT_R8_UNORM,
+ VK_FORMAT_R8_UINT,
+ VK_FORMAT_B10G11R11_UFLOAT_PACK32,
+ VK_FORMAT_R32_SFLOAT,
+ VK_FORMAT_R32_UINT,
+ VK_FORMAT_R32_SINT,
+ VK_FORMAT_R16_SFLOAT,
+ VK_FORMAT_R16G16B16A16_SFLOAT,
+ VK_FORMAT_B8G8R8A8_UNORM,
+ VK_FORMAT_B8G8R8A8_SRGB,
+ VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+ VK_FORMAT_D32_SFLOAT,
+ VK_FORMAT_D16_UNORM,
+ VK_FORMAT_D16_UNORM_S8_UINT,
+ VK_FORMAT_D24_UNORM_S8_UINT,
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+ VK_FORMAT_BC2_UNORM_BLOCK,
+ VK_FORMAT_BC3_UNORM_BLOCK,
+ VK_FORMAT_BC4_UNORM_BLOCK,
+ VK_FORMAT_BC5_UNORM_BLOCK,
+ VK_FORMAT_BC5_SNORM_BLOCK,
+ VK_FORMAT_BC7_UNORM_BLOCK,
+ VK_FORMAT_BC6H_UFLOAT_BLOCK,
+ VK_FORMAT_BC6H_SFLOAT_BLOCK,
+ VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+ VK_FORMAT_BC2_SRGB_BLOCK,
+ VK_FORMAT_BC3_SRGB_BLOCK,
+ VK_FORMAT_BC7_SRGB_BLOCK,
+ VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
+ VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
+ VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+ VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
+ VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
+ VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+ VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+ VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+ VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
+ VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
+ };
std::unordered_map<VkFormat, VkFormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.GetFormatProperties(format));
@@ -260,6 +265,10 @@ bool VKDevice::Create() {
LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
}
+ if (!nv_viewport_swizzle) {
+ LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
+ }
+
VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
if (khr_uniform_buffer_standard_layout) {
std430_layout.sType =
@@ -293,6 +302,17 @@ bool VKDevice::Create() {
LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
}
+ VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border;
+ if (ext_custom_border_color) {
+ custom_border.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
+ custom_border.pNext = nullptr;
+ custom_border.customBorderColors = VK_TRUE;
+ custom_border.customBorderColorWithoutFormat = VK_TRUE;
+ SetNext(next, custom_border);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");
+ }
+
if (!ext_depth_range_unrestricted) {
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
}
@@ -520,7 +540,9 @@ std::vector<const char*> VKDevice::LoadExtensions() {
bool has_khr_shader_float16_int8{};
bool has_ext_subgroup_size_control{};
bool has_ext_transform_feedback{};
+ bool has_ext_custom_border_color{};
for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) {
+ Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
Test(extension, khr_uniform_buffer_standard_layout,
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
@@ -534,6 +556,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {
false);
Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
false);
+ Test(extension, has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME,
+ false);
if (Settings::values.renderer_debug) {
Test(extension, nv_device_diagnostics_config,
VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true);
@@ -606,6 +630,19 @@ std::vector<const char*> VKDevice::LoadExtensions() {
}
}
+ if (has_ext_custom_border_color) {
+ VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features;
+ border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT;
+ border_features.pNext = nullptr;
+ features.pNext = &border_features;
+ physical.GetFeatures2KHR(features);
+
+ if (border_features.customBorderColors && border_features.customBorderColorWithoutFormat) {
+ extensions.push_back(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+ ext_custom_border_color = true;
+ }
+ }
+
return extensions;
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index c8640762d..6b9227b09 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -147,6 +147,11 @@ public:
return is_formatless_image_load_supported;
}
+ /// Returns true if the device supports VK_NV_viewport_swizzle.
+ bool IsNvViewportSwizzleSupported() const {
+ return nv_viewport_swizzle;
+ }
+
/// Returns true if the device supports VK_EXT_scalar_block_layout.
bool IsKhrUniformBufferStandardLayoutSupported() const {
return khr_uniform_buffer_standard_layout;
@@ -172,6 +177,11 @@ public:
return ext_transform_feedback;
}
+ /// Returns true if the device supports VK_EXT_custom_border_color.
+ bool IsExtCustomBorderColorSupported() const {
+ return ext_custom_border_color;
+ }
+
/// Returns the vendor name reported from Vulkan.
std::string_view GetVendorName() const {
return vendor_name;
@@ -222,11 +232,13 @@ private:
bool is_float16_supported{}; ///< Support for float16 arithmetics.
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
+ bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
+ bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
// Telemetry parameters
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 04d07fe6a..043fe7947 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -7,6 +7,7 @@
#include <memory>
#include "video_core/fence_manager.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 1ac981974..69b6bba00 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <array>
#include <cstring>
#include <vector>
@@ -50,6 +51,23 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
topology) == std::end(unsupported_topologies);
}
+VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
+ union {
+ u32 raw;
+ BitField<0, 3, Maxwell::ViewportSwizzle> x;
+ BitField<4, 3, Maxwell::ViewportSwizzle> y;
+ BitField<8, 3, Maxwell::ViewportSwizzle> z;
+ BitField<12, 3, Maxwell::ViewportSwizzle> w;
+ } const unpacked{swizzle};
+
+ VkViewportSwizzleNV result;
+ result.x = MaxwellToVK::ViewportSwizzle(unpacked.x);
+ result.y = MaxwellToVK::ViewportSwizzle(unpacked.y);
+ result.z = MaxwellToVK::ViewportSwizzle(unpacked.z);
+ result.w = MaxwellToVK::ViewportSwizzle(unpacked.w);
+ return result;
+}
+
} // Anonymous namespace
VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
@@ -162,6 +180,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
const auto& ds = fixed_state.depth_stencil;
const auto& cd = fixed_state.color_blending;
const auto& rs = fixed_state.rasterizer;
+ const auto& viewport_swizzles = fixed_state.viewport_swizzles.swizzles;
std::vector<VkVertexInputBindingDescription> vertex_bindings;
std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
@@ -244,12 +263,25 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
viewport_ci.scissorCount = Maxwell::NumViewports;
viewport_ci.pScissors = nullptr;
+ std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
+ std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(),
+ UnpackViewportSwizzle);
+ VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci;
+ swizzle_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV;
+ swizzle_ci.pNext = nullptr;
+ swizzle_ci.flags = 0;
+ swizzle_ci.viewportCount = Maxwell::NumViewports;
+ swizzle_ci.pViewportSwizzles = swizzles.data();
+ if (device.IsNvViewportSwizzleSupported()) {
+ viewport_ci.pNext = &swizzle_ci;
+ }
+
VkPipelineRasterizationStateCreateInfo rasterization_ci;
rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterization_ci.pNext = nullptr;
rasterization_ci.flags = 0;
rasterization_ci.depthClampEnable = rs.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE;
- rasterization_ci.rasterizerDiscardEnable = VK_FALSE;
+ rasterization_ci.rasterizerDiscardEnable = rs.rasterize_enable == 0 ? VK_TRUE : VK_FALSE;
rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
rasterization_ci.cullMode =
rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index fe45ed269..ea66e621e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,7 @@
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/memory_util.h"
+#include "video_core/shader_cache.h"
namespace Vulkan {
@@ -45,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
@@ -104,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
u32 binding = base_binding;
AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
- AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers);
+ AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
+ AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
return binding;
}
@@ -130,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
-CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
- GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
- u32 main_offset)
- : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
+Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+ VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
+ : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {}
-CachedShader::~CachedShader() = default;
+Shader::~Shader() = default;
-Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
- Core::System& system, Tegra::Engines::ShaderType stage) {
- if (stage == Tegra::Engines::ShaderType::Compute) {
+Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
+ Tegra::Engines::ShaderType stage) {
+ if (stage == ShaderType::Compute) {
return system.GPU().KeplerCompute();
} else {
return system.GPU().Maxwell3D();
@@ -154,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache)
- : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
- descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
- renderpass_cache{renderpass_cache} {}
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
+ scheduler{scheduler}, descriptor_pool{descriptor_pool},
+ update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
VKPipelineCache::~VKPipelineCache() = default;
-std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const auto& gpu = system.GPU().Maxwell3D();
- std::array<Shader, Maxwell::MaxShaderProgram> shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -176,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
const GPUVAddr program_addr{GetShaderAddress(system, program)};
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
- auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
- if (!shader) {
+
+ Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
+ if (!result) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
// No shader found - create a new one
constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
- const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
+ const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
+ stage_offset);
+ result = shader.get();
- shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
- std::move(code), stage_offset);
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader), *cpu_addr, size_in_bytes);
} else {
- null_shader = shader;
+ null_shader = std::move(shader);
}
}
- shaders[index] = std::move(shader);
+ shaders[index] = result;
}
return last_shaders = shaders;
}
@@ -234,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
- auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
+ Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) {
// No shader found - create a new one
const auto host_ptr = memory_manager.GetPointer(program_addr);
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
- shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
- program_addr, *cpu_addr, std::move(code),
- KERNEL_MAIN_OFFSET);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
+ std::move(code), KERNEL_MAIN_OFFSET);
+ shader = shader_info.get();
+
if (cpu_addr) {
- Register(shader);
+ Register(std::move(shader_info), *cpu_addr, size_in_bytes);
} else {
- null_kernel = shader;
+ null_kernel = std::move(shader_info);
}
}
@@ -262,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
return *entry;
}
-void VKPipelineCache::Unregister(const Shader& shader) {
+void VKPipelineCache::OnShaderRemoval(Shader* shader) {
bool finished = false;
const auto Finish = [&] {
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -294,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
Finish();
it = compute_cache.erase(it);
}
-
- RasterizerCache::Unregister(shader);
}
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
@@ -312,7 +319,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
ASSERT(point_size != 0.0f);
}
for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
- specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();
+ const auto& attribute = fixed_state.vertex_input.attributes[i];
+ specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
+ specialization.attribute_types[i] = attribute.Type();
}
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
@@ -328,13 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
}
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
- const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
- const auto shader = TryGet(*cpu_addr);
- ASSERT(shader);
+ const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
- const auto program_type = GetShaderType(program_enum);
+ const ShaderType program_type = GetShaderType(program_enum);
const auto& entries = shader->GetEntries();
program[stage] = {
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
@@ -376,16 +383,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
return;
}
- if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) {
- // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
- // crash.
+ if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
+ descriptor_type == STORAGE_TEXEL_BUFFER) {
+ // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
+ // Note: Fixed in driver Windows 443.24, Linux 440.66.15
for (u32 i = 0; i < count; ++i) {
VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
entry.dstBinding = binding + i;
entry.dstArrayElement = 0;
entry.descriptorCount = 1;
entry.descriptorType = descriptor_type;
- entry.offset = offset + i * entry_size;
+ entry.offset = static_cast<std::size_t>(offset + i * entry_size);
entry.stride = entry_size;
}
} else if (count > 0) {
@@ -406,8 +414,9 @@ void FillDescriptorUpdateTemplateEntries(
std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
- AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers);
+ AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
+ AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
}
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fef..0a36e5112 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,7 +17,6 @@
#include "common/common_types.h"
#include "video_core/engines/const_buffer_engine_interface.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -26,6 +25,7 @@
#include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader_cache.h"
namespace Core {
class System;
@@ -41,8 +41,6 @@ class VKFence;
class VKScheduler;
class VKUpdateDescriptorQueue;
-class CachedShader;
-using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey {
@@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
namespace Vulkan {
-class CachedShader final : public RasterizerCacheObject {
+class Shader {
public:
- explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
- VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
- u32 main_offset);
- ~CachedShader();
+ explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+ VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
+ ~Shader();
GPUVAddr GetGpuAddr() const {
return gpu_addr;
}
- std::size_t GetSizeInBytes() const override {
- return program_code.size() * sizeof(u64);
- }
-
VideoCommon::Shader::ShaderIR& GetIR() {
return shader_ir;
}
@@ -144,25 +137,23 @@ private:
ShaderEntries entries;
};
-class VKPipelineCache final : public RasterizerCache<Shader> {
+class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
const VKDevice& device, VKScheduler& scheduler,
VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache);
- ~VKPipelineCache();
+ ~VKPipelineCache() override;
- std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
+ std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
protected:
- void Unregister(const Shader& shader) override;
-
- void FlushObjectInner(const Shader& object) override {}
+ void OnShaderRemoval(Shader* shader) final;
private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -175,10 +166,10 @@ private:
VKUpdateDescriptorQueue& update_descriptor_queue;
VKRenderPassCache& renderpass_cache;
- Shader null_shader{};
- Shader null_kernel{};
+ std::unique_ptr<Shader> null_shader;
+ std::unique_ptr<Shader> null_kernel;
- std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
+ std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 8b009fc22..a8d94eac3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,6 +38,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader_cache.h"
namespace Vulkan {
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
}
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
- const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
for (std::size_t i = 0; i < std::size(addresses); ++i) {
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
std::size_t stage, std::size_t index = 0) {
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
+ if constexpr (std::is_same_v<Entry, SamplerEntry>) {
+ if (entry.is_separated) {
+ const u32 buffer_1 = entry.buffer;
+ const u32 buffer_2 = entry.secondary_buffer;
+ const u32 offset_1 = entry.offset;
+ const u32 offset_2 = entry.secondary_offset;
+ const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
+ const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
+ return engine.GetTextureInfo(handle_1 | handle_2);
+ }
+ }
if (entry.is_bindless) {
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
return engine.GetTextureInfo(tex_handle);
@@ -131,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
}
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
+ if (!is_clear) {
+ return true;
+ }
+ // First we have to make sure all clear masks are enabled.
+ if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
+ !regs.clear_buffers.A) {
+ return true;
+ }
+ // If scissors are disabled, the whole screen is cleared
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Then we have to confirm scissor testing clears the whole image
+ const std::size_t index = regs.clear_buffers.RT;
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
+ scissor.max_y < regs.rt[index].height;
+}
+
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
+ // If we are not clearing, the contents have to be preserved
+ if (!is_clear) {
+ return true;
+ }
+ // For depth stencil clears we only have to confirm scissor test covers the whole image
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Make sure the clear cover the whole image
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
+ scissor.max_y < regs.zeta_height;
+}
+
} // Anonymous namespace
class BufferBindings final {
@@ -332,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
buffer_cache.Unmap();
- const Texceptions texceptions = UpdateAttachments();
+ const Texceptions texceptions = UpdateAttachments(false);
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
key.renderpass_params = GetRenderPassParams(texceptions);
@@ -388,7 +443,7 @@ void RasterizerVulkan::Clear() {
return;
}
- [[maybe_unused]] const auto texceptions = UpdateAttachments();
+ [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
DEBUG_ASSERT(texceptions.none());
SetupImageTransitions(0, color_attachments, zeta_attachment);
@@ -468,8 +523,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
const auto& entries = pipeline.GetEntries();
SetupComputeConstBuffers(entries);
SetupComputeGlobalBuffers(entries);
- SetupComputeTexelBuffers(entries);
+ SetupComputeUniformTexels(entries);
SetupComputeTextures(entries);
+ SetupComputeStorageTexels(entries);
SetupComputeImages(entries);
buffer_cache.Unmap();
@@ -532,14 +588,14 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
return;
}
texture_cache.OnCPUWrite(addr, size);
- pipeline_cache.InvalidateRegion(addr, size);
+ pipeline_cache.OnCPUWrite(addr, size);
buffer_cache.OnCPUWrite(addr, size);
- query_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::SyncGuestHost() {
texture_cache.SyncGuestHost();
buffer_cache.SyncGuestHost();
+ pipeline_cache.SyncGuestHost();
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
@@ -569,7 +625,9 @@ void RasterizerVulkan::ReleaseFences() {
}
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
- FlushRegion(addr, size);
+ if (Settings::IsGPULevelExtreme()) {
+ FlushRegion(addr, size);
+ }
InvalidateRegion(addr, size);
}
@@ -662,9 +720,12 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
+RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& dirty = system.GPU().Maxwell3D().dirty.flags;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ auto& dirty = maxwell3d.dirty.flags;
+ auto& regs = maxwell3d.regs;
+
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty[VideoCommon::Dirty::RenderTargets] = false;
@@ -673,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
Texceptions texceptions;
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (update_rendertargets) {
- color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
+ const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
+ color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
}
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
texceptions[rt] = true;
@@ -681,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
}
if (update_rendertargets) {
- zeta_attachment = texture_cache.GetDepthBufferSurface(true);
+ const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
+ zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
}
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
texceptions[ZETA_TEXCEPTION_INDEX] = true;
@@ -713,7 +776,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
if (!view) {
return false;
}
- key.views.push_back(view->GetHandle());
+ key.views.push_back(view->GetAttachment());
key.width = std::min(key.width, view->GetWidth());
key.height = std::min(key.height, view->GetHeight());
key.layers = std::min(key.layers, view->GetNumLayers());
@@ -773,20 +836,21 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
}
void RasterizerVulkan::SetupShaderDescriptors(
- const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+ const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
texture_cache.GuardSamplers(true);
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
// Skip VertexA stage
- const auto& shader = shaders[stage + 1];
+ Shader* const shader = shaders[stage + 1];
if (!shader) {
continue;
}
const auto& entries = shader->GetEntries();
SetupGraphicsConstBuffers(entries, stage);
SetupGraphicsGlobalBuffers(entries, stage);
- SetupGraphicsTexelBuffers(entries, stage);
+ SetupGraphicsUniformTexels(entries, stage);
SetupGraphicsTextures(entries, stage);
+ SetupGraphicsStorageTexels(entries, stage);
SetupGraphicsImages(entries, stage);
}
texture_cache.GuardSamplers(false);
@@ -836,6 +900,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
if (regs.tfb_enabled == 0) {
return;
}
+ if (!device.IsExtTransformFeedbackSupported()) {
+ LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
+ return;
+ }
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
@@ -850,10 +918,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
const GPUVAddr gpu_addr = binding.Address();
- const std::size_t size = binding.buffer_size;
- const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+ const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
+ const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
- scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
});
@@ -864,6 +932,9 @@ void RasterizerVulkan::EndTransformFeedback() {
if (regs.tfb_enabled == 0) {
return;
}
+ if (!device.IsExtTransformFeedbackSupported()) {
+ return;
+ }
scheduler.Record(
[](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
@@ -875,14 +946,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
const auto& attrib = regs.vertex_attrib_format[index];
- if (!attrib.IsValid()) {
+ if (attrib.IsConstant()) {
vertex_input.SetAttribute(index, false, 0, 0, {}, {});
continue;
}
-
- [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
- ASSERT(buffer.IsEnabled());
-
vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
attrib.size.Value());
}
@@ -906,8 +973,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
continue;
}
- const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
- buffer_bindings.AddVertexBinding(buffer, offset);
+ const auto info = buffer_cache.UploadMemory(start, size);
+ buffer_bindings.AddVertexBinding(info.handle, info.offset);
}
}
@@ -929,7 +996,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
break;
}
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ VkBuffer buffer = info.handle;
+ u64 offset = info.offset;
std::tie(buffer, offset) = quad_indexed_pass.Assemble(
regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
@@ -943,7 +1012,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
break;
}
const GPUVAddr gpu_addr = regs.index_array.IndexStart();
- auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+ VkBuffer buffer = info.handle;
+ u64 offset = info.offset;
auto format = regs.index_array.format;
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@@ -978,12 +1049,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
}
}
-void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
+void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D();
- for (const auto& entry : entries.texel_buffers) {
+ for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, stage).tic;
- SetupTexelBuffer(image, entry);
+ SetupUniformTexels(image, entry);
}
}
@@ -998,6 +1069,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
}
}
+void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
+ MICROPROFILE_SCOPE(Vulkan_Textures);
+ const auto& gpu = system.GPU().Maxwell3D();
+ for (const auto& entry : entries.storage_texels) {
+ const auto image = GetTextureInfo(gpu, entry, stage).tic;
+ SetupStorageTexel(image, entry);
+ }
+}
+
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().Maxwell3D();
@@ -1030,12 +1110,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
}
}
-void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
+void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute();
- for (const auto& entry : entries.texel_buffers) {
+ for (const auto& entry : entries.uniform_texels) {
const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
- SetupTexelBuffer(image, entry);
+ SetupUniformTexels(image, entry);
}
}
@@ -1050,6 +1130,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
}
}
+void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
+ MICROPROFILE_SCOPE(Vulkan_Textures);
+ const auto& gpu = system.GPU().KeplerCompute();
+ for (const auto& entry : entries.storage_texels) {
+ const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ SetupStorageTexel(image, entry);
+ }
+}
+
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images);
const auto& gpu = system.GPU().KeplerCompute();
@@ -1072,10 +1161,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
ASSERT(size <= MaxConstbufferSize);
- const auto [buffer_handle, offset] =
+ const auto info =
buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
-
- update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
}
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
@@ -1089,18 +1177,18 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
// Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
// default buffer.
static constexpr std::size_t dummy_size = 4;
- const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
- update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
+ const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
return;
}
- const auto [buffer, offset] = buffer_cache.UploadMemory(
+ const auto info = buffer_cache.UploadMemory(
actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
- update_descriptor_queue.AddBuffer(buffer, offset, size);
+ update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
}
-void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
- const TexelBufferEntry& entry) {
+void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
+ const UniformTexelEntry& entry) {
const auto view = texture_cache.GetTextureSurface(tic, entry);
ASSERT(view->IsBufferView());
@@ -1112,16 +1200,24 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
auto view = texture_cache.GetTextureSurface(texture.tic, entry);
ASSERT(!view->IsBufferView());
- const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
- texture.tic.z_source, texture.tic.w_source);
+ const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
+ texture.tic.z_source, texture.tic.w_source);
const auto sampler = sampler_cache.GetSampler(texture.tsc);
update_descriptor_queue.AddSampledImage(sampler, image_view);
- const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+ VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
*image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
sampled_views.push_back(ImageView{std::move(view), image_layout});
}
+void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
+ const StorageTexelEntry& entry) {
+ const auto view = texture_cache.GetImageSurface(tic, entry);
+ ASSERT(view->IsBufferView());
+
+ update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
+}
+
void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
auto view = texture_cache.GetImageSurface(tic, entry);
@@ -1131,10 +1227,11 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
UNIMPLEMENTED_IF(tic.IsBuffer());
- const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ const VkImageView image_view =
+ view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
update_descriptor_queue.AddImage(image_view);
- const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+ VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
*image_layout = VK_IMAGE_LAYOUT_GENERAL;
image_views.push_back(ImageView{std::move(view), image_layout});
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 0ed0e48c6..83e00e7e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -159,7 +159,10 @@ private:
void FlushWork();
- Texceptions UpdateAttachments();
+ /// @brief Updates the currently bound attachments
+ /// @param is_clear True when the framebuffer is updated as a clear
+ /// @return Bitfield of attachments being used as sampled textures
+ Texceptions UpdateAttachments(bool is_clear);
std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
@@ -168,7 +171,7 @@ private:
bool is_indexed, bool is_instanced);
/// Setup descriptors in the graphics pipeline.
- void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+ void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
void SetupImageTransitions(Texceptions texceptions,
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
@@ -193,12 +196,15 @@ private:
/// Setup global buffers in the graphics pipeline.
void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
- /// Setup texel buffers in the graphics pipeline.
- void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
+ /// Setup uniform texels in the graphics pipeline.
+ void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
/// Setup textures in the graphics pipeline.
void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
+ /// Setup storage texels in the graphics pipeline.
+ void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
+
/// Setup images in the graphics pipeline.
void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
@@ -209,11 +215,14 @@ private:
void SetupComputeGlobalBuffers(const ShaderEntries& entries);
/// Setup texel buffers in the compute pipeline.
- void SetupComputeTexelBuffers(const ShaderEntries& entries);
+ void SetupComputeUniformTexels(const ShaderEntries& entries);
/// Setup textures in the compute pipeline.
void SetupComputeTextures(const ShaderEntries& entries);
+ /// Setup storage texels in the compute pipeline.
+ void SetupComputeStorageTexels(const ShaderEntries& entries);
+
/// Setup images in the compute pipeline.
void SetupComputeImages(const ShaderEntries& entries);
@@ -222,10 +231,12 @@ private:
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
- void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
+ void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
+ void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
+
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 2687d8d95..616eacc36 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -9,6 +9,8 @@
#include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/textures/texture.h"
+using Tegra::Texture::TextureMipmapFilter;
+
namespace Vulkan {
namespace {
@@ -39,9 +41,18 @@ VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
VKSamplerCache::~VKSamplerCache() = default;
vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
+ const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
+ const std::array color = tsc.GetBorderColor();
+
+ VkSamplerCustomBorderColorCreateInfoEXT border;
+ border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT;
+ border.pNext = nullptr;
+ border.format = VK_FORMAT_UNDEFINED;
+ std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
+
VkSamplerCreateInfo ci;
ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
- ci.pNext = nullptr;
+ ci.pNext = arbitrary_borders ? &border : nullptr;
ci.flags = 0;
ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter);
ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter);
@@ -54,9 +65,9 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
ci.maxAnisotropy = tsc.GetMaxAnisotropy();
ci.compareEnable = tsc.depth_compare_enabled;
ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
- ci.minLod = tsc.GetMinLod();
- ci.maxLod = tsc.GetMaxLod();
- ci.borderColor = ConvertBorderColor(tsc.GetBorderColor());
+ ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
+ ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
+ ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
ci.unnormalizedCoordinates = VK_FALSE;
return device.GetLogical().CreateSampler(ci);
}
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 82ec9180e..56524e6f3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -9,6 +9,7 @@
#include <utility>
#include "common/microprofile.h"
+#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
}
void VKScheduler::WorkerThread() {
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
std::unique_lock lock{mutex};
do {
cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 18678968c..97429cc59 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -400,8 +400,9 @@ private:
u32 binding = specialization.base_binding;
binding = DeclareConstantBuffers(binding);
binding = DeclareGlobalBuffers(binding);
- binding = DeclareTexelBuffers(binding);
+ binding = DeclareUniformTexels(binding);
binding = DeclareSamplers(binding);
+ binding = DeclareStorageTexels(binding);
binding = DeclareImages(binding);
const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
@@ -515,6 +516,16 @@ private:
void DeclareCommon() {
thread_id =
DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
+ thread_masks[0] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
+ thread_masks[1] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
+ thread_masks[2] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
+ thread_masks[3] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
+ thread_masks[4] =
+ DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
}
void DeclareVertex() {
@@ -731,8 +742,10 @@ private:
if (!IsGenericAttribute(index)) {
continue;
}
-
const u32 location = GetGenericAttributeLocation(index);
+ if (!IsAttributeEnabled(location)) {
+ continue;
+ }
const auto type_descriptor = GetAttributeType(location);
Id type;
if (IsInputAttributeArray()) {
@@ -877,7 +890,7 @@ private:
return binding;
}
- u32 DeclareTexelBuffers(u32 binding) {
+ u32 DeclareUniformTexels(u32 binding) {
for (const auto& sampler : ir.GetSamplers()) {
if (!sampler.is_buffer) {
continue;
@@ -898,7 +911,7 @@ private:
Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id});
+ uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
}
return binding;
}
@@ -933,31 +946,48 @@ private:
return binding;
}
- u32 DeclareImages(u32 binding) {
+ u32 DeclareStorageTexels(u32 binding) {
for (const auto& image : ir.GetImages()) {
- const auto [dim, arrayed] = GetImageDim(image);
- constexpr int depth = 0;
- constexpr bool ms = false;
- constexpr int sampled = 2; // This won't be accessed with a sampler
- constexpr auto format = spv::ImageFormat::Unknown;
- const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
- const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
- const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
- AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
-
- Decorate(id, spv::Decoration::Binding, binding++);
- Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
- if (image.is_read && !image.is_written) {
- Decorate(id, spv::Decoration::NonWritable);
- } else if (image.is_written && !image.is_read) {
- Decorate(id, spv::Decoration::NonReadable);
+ if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
+ continue;
}
+ DeclareImage(image, binding);
+ }
+ return binding;
+ }
- images.emplace(image.index, StorageImage{image_type, id});
+ u32 DeclareImages(u32 binding) {
+ for (const auto& image : ir.GetImages()) {
+ if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
+ continue;
+ }
+ DeclareImage(image, binding);
}
return binding;
}
+ void DeclareImage(const Image& image, u32& binding) {
+ const auto [dim, arrayed] = GetImageDim(image);
+ constexpr int depth = 0;
+ constexpr bool ms = false;
+ constexpr int sampled = 2; // This won't be accessed with a sampler
+ const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
+ const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
+ const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
+ const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
+ AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
+
+ Decorate(id, spv::Decoration::Binding, binding++);
+ Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
+ if (image.is_read && !image.is_written) {
+ Decorate(id, spv::Decoration::NonWritable);
+ } else if (image.is_written && !image.is_read) {
+ Decorate(id, spv::Decoration::NonReadable);
+ }
+
+ images.emplace(image.index, StorageImage{image_type, id});
+ }
+
bool IsRenderTargetEnabled(u32 rt) const {
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
@@ -976,6 +1006,10 @@ private:
return stage == ShaderType::TesselationControl;
}
+ bool IsAttributeEnabled(u32 location) const {
+ return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
+ }
+
u32 GetNumInputVertices() const {
switch (stage) {
case ShaderType::Geometry:
@@ -1071,8 +1105,7 @@ private:
void VisitBasicBlock(const NodeBlock& bb) {
for (const auto& node : bb) {
- [[maybe_unused]] const Type type = Visit(node).type;
- ASSERT(type == Type::Void);
+ Visit(node);
}
}
@@ -1192,16 +1225,20 @@ private:
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
return {v_float_zero, Type::Float};
default:
- if (IsGenericAttribute(attribute)) {
- const u32 location = GetGenericAttributeLocation(attribute);
- const auto type_descriptor = GetAttributeType(location);
- const Type type = type_descriptor.type;
- const Id attribute_id = input_attributes.at(attribute);
- const std::vector elements = {element};
- const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
- return {OpLoad(GetTypeDefinition(type), pointer), type};
+ if (!IsGenericAttribute(attribute)) {
+ break;
}
- break;
+ const u32 location = GetGenericAttributeLocation(attribute);
+ if (!IsAttributeEnabled(location)) {
+ // Disabled attributes (also known as constant attributes) always return zero.
+ return {v_float_zero, Type::Float};
+ }
+ const auto type_descriptor = GetAttributeType(location);
+ const Type type = type_descriptor.type;
+ const Id attribute_id = input_attributes.at(attribute);
+ const std::vector elements = {element};
+ const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
+ return {OpLoad(GetTypeDefinition(type), pointer), type};
}
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
return {v_float_zero, Type::Float};
@@ -1237,7 +1274,7 @@ private:
} else {
UNREACHABLE_MSG("Unmanaged offset node type");
}
- pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index,
+ pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
buffer_element);
}
return {OpLoad(t_float, pointer), Type::Float};
@@ -1362,7 +1399,9 @@ private:
Expression target{};
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op
+ // Writing to Register::ZeroIndex is a no op but we still have to visit its source
+ // because it might have side effects.
+ Visit(src);
return {};
}
target = {registers.at(gpr->GetIndex()), Type::Float};
@@ -1590,7 +1629,7 @@ private:
const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
const Id carry = OpCompositeExtract(t_uint, result, 1);
- return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool};
+ return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
}
Expression LogicalAssign(Operation operation) {
@@ -1618,6 +1657,24 @@ private:
return {};
}
+ Expression LogicalFOrdered(Operation operation) {
+ // Emulate SPIR-V's OpOrdered
+ const Id op_a = AsFloat(Visit(operation[0]));
+ const Id op_b = AsFloat(Visit(operation[1]));
+ const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
+ const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
+ return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
+ }
+
+ Expression LogicalFUnordered(Operation operation) {
+ // Emulate SPIR-V's OpUnordered
+ const Id op_a = AsFloat(Visit(operation[0]));
+ const Id op_b = AsFloat(Visit(operation[1]));
+ const Id is_nan_a = OpIsNan(t_bool, op_a);
+ const Id is_nan_b = OpIsNan(t_bool, op_b);
+ return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
+ }
+
Id GetTextureSampler(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
ASSERT(!meta.sampler.is_buffer);
@@ -1635,7 +1692,7 @@ private:
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
const u32 index = meta.sampler.index;
if (meta.sampler.is_buffer) {
- const auto& entry = texel_buffers.at(index);
+ const auto& entry = uniform_texels.at(index);
return OpLoad(entry.image_type, entry.image);
} else {
const auto& entry = sampled_images.at(index);
@@ -1912,39 +1969,20 @@ private:
return {};
}
- Expression AtomicImageAdd(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageMin(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageMax(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageAnd(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
-
- Expression AtomicImageOr(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
+ template <Id (Module::*func)(Id, Id, Id, Id, Id)>
+ Expression AtomicImage(Operation operation) {
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
+ ASSERT(meta.values.size() == 1);
- Expression AtomicImageXor(Operation operation) {
- UNIMPLEMENTED();
- return {};
- }
+ const Id coordinate = GetCoordinates(operation, Type::Int);
+ const Id image = images.at(meta.image.index).image;
+ const Id sample = v_uint_zero;
+ const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
- Expression AtomicImageExchange(Operation operation) {
- UNIMPLEMENTED();
- return {};
+ const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
+ const Id semantics = v_uint_zero;
+ const Id value = AsUint(Visit(meta.values[0]));
+ return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
}
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
@@ -1959,7 +1997,7 @@ private:
return {v_float_zero, Type::Float};
}
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
- const Id semantics = Constant(t_uint, 0);
+ const Id semantics = v_uint_zero;
const Id value = AsUint(Visit(operation[1]));
return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
@@ -2157,14 +2195,37 @@ private:
return {OpLoad(t_uint, thread_id), Type::Uint};
}
+ template <std::size_t index>
+ Expression ThreadMask(Operation) {
+ // TODO(Rodrigo): Handle devices with different warp sizes
+ const Id mask = thread_masks[index];
+ return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
+ }
+
Expression ShuffleIndexed(Operation operation) {
const Id value = AsFloat(Visit(operation[0]));
const Id index = AsUint(Visit(operation[1]));
return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
}
- Expression MemoryBarrierGL(Operation) {
- const auto scope = spv::Scope::Device;
+ Expression Barrier(Operation) {
+ if (!ir.IsDecompiled()) {
+ LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
+ return {};
+ }
+
+ const auto scope = spv::Scope::Workgroup;
+ const auto memory = spv::Scope::Workgroup;
+ const auto semantics =
+ spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
+ OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
+ Constant(t_uint, static_cast<u32>(memory)),
+ Constant(t_uint, static_cast<u32>(semantics)));
+ return {};
+ }
+
+ template <spv::Scope scope>
+ Expression MemoryBarrier(Operation) {
const auto semantics =
spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
spv::MemorySemanticsMask::WorkgroupMemory |
@@ -2511,7 +2572,14 @@ private:
&SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
- &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::LogicalFOrdered,
+ &SPIRVDecompiler::LogicalFUnordered,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
+ &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
&SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
&SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
@@ -2553,11 +2621,11 @@ private:
&SPIRVDecompiler::ImageLoad,
&SPIRVDecompiler::ImageStore,
- &SPIRVDecompiler::AtomicImageAdd,
- &SPIRVDecompiler::AtomicImageAnd,
- &SPIRVDecompiler::AtomicImageOr,
- &SPIRVDecompiler::AtomicImageXor,
- &SPIRVDecompiler::AtomicImageExchange,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
+ &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
@@ -2614,9 +2682,16 @@ private:
&SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
&SPIRVDecompiler::ThreadId,
+ &SPIRVDecompiler::ThreadMask<0>, // Eq
+ &SPIRVDecompiler::ThreadMask<1>, // Ge
+ &SPIRVDecompiler::ThreadMask<2>, // Gt
+ &SPIRVDecompiler::ThreadMask<3>, // Le
+ &SPIRVDecompiler::ThreadMask<4>, // Lt
&SPIRVDecompiler::ShuffleIndexed,
- &SPIRVDecompiler::MemoryBarrierGL,
+ &SPIRVDecompiler::Barrier,
+ &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
+ &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2692,8 +2767,11 @@ private:
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
+ const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
+
const Id v_float_zero = Constant(t_float, 0.0f);
const Id v_float_one = Constant(t_float, 1.0f);
+ const Id v_uint_zero = Constant(t_uint, 0);
// Nvidia uses these defaults for varyings (e.g. position and generic attributes)
const Id v_varying_default =
@@ -2718,15 +2796,16 @@ private:
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
std::map<u32, Id> constant_buffers;
std::map<GlobalMemoryBase, Id> global_buffers;
- std::map<u32, TexelBuffer> texel_buffers;
+ std::map<u32, TexelBuffer> uniform_texels;
std::map<u32, SampledImage> sampled_images;
+ std::map<u32, TexelBuffer> storage_texels;
std::map<u32, StorageImage> images;
+ std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id instance_index{};
Id vertex_index{};
Id base_instance{};
Id base_vertex{};
- std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
Id frag_depth{};
Id frag_coord{};
Id front_facing{};
@@ -2738,6 +2817,7 @@ private:
Id workgroup_id{};
Id local_invocation_id{};
Id thread_id{};
+ std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
VertexIndices in_indices;
VertexIndices out_indices;
@@ -2981,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
}
for (const auto& sampler : ir.GetSamplers()) {
if (sampler.is_buffer) {
- entries.texel_buffers.emplace_back(sampler);
+ entries.uniform_texels.emplace_back(sampler);
} else {
entries.samplers.emplace_back(sampler);
}
}
for (const auto& image : ir.GetImages()) {
- entries.images.emplace_back(image);
+ if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
+ entries.storage_texels.emplace_back(image);
+ } else {
+ entries.images.emplace_back(image);
+ }
}
for (const auto& attribute : ir.GetInputAttributes()) {
if (IsGenericAttribute(attribute)) {
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f4c05ac3c..2b0e90396 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -21,8 +21,9 @@ class VKDevice;
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using TexelBufferEntry = VideoCommon::Shader::Sampler;
+using UniformTexelEntry = VideoCommon::Shader::Sampler;
using SamplerEntry = VideoCommon::Shader::Sampler;
+using StorageTexelEntry = VideoCommon::Shader::Image;
using ImageEntry = VideoCommon::Shader::Image;
constexpr u32 DESCRIPTOR_SET = 0;
@@ -66,13 +67,15 @@ private:
struct ShaderEntries {
u32 NumBindings() const {
return static_cast<u32>(const_buffers.size() + global_buffers.size() +
- texel_buffers.size() + samplers.size() + images.size());
+ uniform_texels.size() + samplers.size() + storage_texels.size() +
+ images.size());
}
std::vector<ConstBufferEntry> const_buffers;
std::vector<GlobalBufferEntry> global_buffers;
- std::vector<TexelBufferEntry> texel_buffers;
+ std::vector<UniformTexelEntry> uniform_texels;
std::vector<SamplerEntry> samplers;
+ std::vector<StorageTexelEntry> storage_texels;
std::vector<ImageEntry> images;
std::set<u32> attributes;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
@@ -88,7 +91,8 @@ struct Specialization final {
u32 shared_memory_size{};
// Graphics specific
- std::optional<float> point_size{};
+ std::optional<float> point_size;
+ std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
};
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index dfddf7ad6..689f0d276 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,10 +35,14 @@ public:
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Unmap(u64 size);
- VkBuffer GetHandle() const {
+ VkBuffer Handle() const noexcept {
return *buffer;
}
+ u64 Address() const noexcept {
+ return 0;
+ }
+
private:
struct Watch final {
VKFenceWatch fence;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 55f43e61b..430031665 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
ci.pNext = nullptr;
ci.flags = 0;
ci.size = static_cast<VkDeviceSize>(host_memory_size);
- ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
- VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
ci.queueFamilyIndexCount = 0;
ci.pQueueFamilyIndices = nullptr;
@@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
ci.extent = {params.width, params.height, 1};
break;
case SurfaceTarget::Texture3D:
+ ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
ci.extent = {params.width, params.height, params.depth};
break;
case SurfaceTarget::TextureBuffer:
@@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
return ci;
}
+u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
+ return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+ (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+}
+
} // Anonymous namespace
CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
@@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
}
// TODO(Rodrigo): Move this to a virtual function.
- main_view = CreateViewInner(
- ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
- true);
+ u32 num_layers = 1;
+ if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
+ num_layers = params.depth;
+ }
+ main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
}
CachedSurface::~CachedSurface() = default;
@@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() {
}
View CachedSurface::CreateView(const ViewParams& params) {
- return CreateViewInner(params, false);
-}
-
-View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
// TODO(Rodrigo): Add name decorations
- return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
+ return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
}
void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
@@ -342,38 +347,44 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
}
CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params, bool is_proxy)
+ const ViewParams& params)
: VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
- base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
- num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
- : VK_IMAGE_VIEW_TYPE_1D} {}
+ base_level{params.base_level}, num_levels{params.num_levels},
+ image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ base_layer = 0;
+ num_layers = 1;
+ base_slice = params.base_layer;
+ num_slices = params.num_layers;
+ } else {
+ base_layer = params.base_layer;
+ num_layers = params.num_layers;
+ }
+}
CachedSurfaceView::~CachedSurfaceView() = default;
-VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
- SwizzleSource z_source, SwizzleSource w_source) {
- const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
- if (last_image_view && last_swizzle == swizzle) {
+VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
+ SwizzleSource z_source, SwizzleSource w_source) {
+ const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
+ if (last_image_view && last_swizzle == new_swizzle) {
return last_image_view;
}
- last_swizzle = swizzle;
+ last_swizzle = new_swizzle;
- const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle);
+ const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
auto& image_view = entry->second;
if (!is_cache_miss) {
return last_image_view = *image_view;
}
- auto swizzle_x = MaxwellToVK::SwizzleSource(x_source);
- auto swizzle_y = MaxwellToVK::SwizzleSource(y_source);
- auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
- auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
-
+ std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
+ MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
// A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
- std::swap(swizzle_x, swizzle_z);
+ std::swap(swizzle[0], swizzle[2]);
}
// Games can sample depth or stencil values on textures. This is decided by the swizzle value on
@@ -395,11 +406,16 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
UNIMPLEMENTED();
}
- // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
- swizzle_x = VK_COMPONENT_SWIZZLE_R;
- swizzle_y = VK_COMPONENT_SWIZZLE_G;
- swizzle_z = VK_COMPONENT_SWIZZLE_B;
- swizzle_w = VK_COMPONENT_SWIZZLE_A;
+ // Make sure we sample the first component
+ std::transform(
+ swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) {
+ return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component;
+ });
+ }
+
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ ASSERT(base_slice == 0);
+ ASSERT(num_slices == params.depth);
}
VkImageViewCreateInfo ci;
@@ -409,7 +425,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
ci.image = surface.GetImageHandle();
ci.viewType = image_view_type;
ci.format = surface.GetImage().GetFormat();
- ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
+ ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]};
ci.subresourceRange.aspectMask = aspect;
ci.subresourceRange.baseMipLevel = base_level;
ci.subresourceRange.levelCount = num_levels;
@@ -420,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
return last_image_view = *image_view;
}
+VkImageView CachedSurfaceView::GetAttachment() {
+ if (render_target) {
+ return *render_target;
+ }
+
+ VkImageViewCreateInfo ci;
+ ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+ ci.pNext = nullptr;
+ ci.flags = 0;
+ ci.image = surface.GetImageHandle();
+ ci.format = surface.GetImage().GetFormat();
+ ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
+ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
+ ci.subresourceRange.aspectMask = aspect_mask;
+ ci.subresourceRange.baseMipLevel = base_level;
+ ci.subresourceRange.levelCount = num_levels;
+ if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
+ ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
+ ci.subresourceRange.baseArrayLayer = base_slice;
+ ci.subresourceRange.layerCount = num_slices;
+ } else {
+ ci.viewType = image_view_type;
+ ci.subresourceRange.baseArrayLayer = base_layer;
+ ci.subresourceRange.layerCount = num_layers;
+ }
+ render_target = device.GetLogical().CreateImageView(ci);
+ return *render_target;
+}
+
VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
const VKDevice& device, VKResourceManager& resource_manager,
VKMemoryManager& memory_manager, VKScheduler& scheduler,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f211ccb1e..807e26c8a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -91,7 +91,6 @@ protected:
void DecorateSurfaceName();
View CreateView(const ViewParams& params) override;
- View CreateViewInner(const ViewParams& params, bool is_proxy);
private:
void UploadBuffer(const std::vector<u8>& staging_buffer);
@@ -120,23 +119,20 @@ private:
class CachedSurfaceView final : public VideoCommon::ViewBase {
public:
explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
- const ViewParams& params, bool is_proxy);
+ const ViewParams& params);
~CachedSurfaceView();
- VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
+ VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
+ Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source,
+ Tegra::Texture::SwizzleSource w_source);
+
+ VkImageView GetAttachment();
bool IsSameSurface(const CachedSurfaceView& rhs) const {
return &surface == &rhs.surface;
}
- VkImageView GetHandle() {
- return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
- Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
- }
-
u32 GetWidth() const {
return params.GetMipWidth(base_level);
}
@@ -180,14 +176,6 @@ public:
}
private:
- static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source) {
- return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
- (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
- }
-
// Store a copy of these values to avoid double dereference when reading them
const SurfaceParams params;
const VkImage image;
@@ -196,15 +184,18 @@ private:
const VKDevice& device;
CachedSurface& surface;
- const u32 base_layer;
- const u32 num_layers;
const u32 base_level;
const u32 num_levels;
const VkImageViewType image_view_type;
+ u32 base_layer = 0;
+ u32 num_layers = 0;
+ u32 base_slice = 0;
+ u32 num_slices = 0;
VkImageView last_image_view = nullptr;
u32 last_swizzle = 0;
+ vk::ImageView render_target;
std::unordered_map<u32, vk::ImageView> view_cache;
};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 681ecde98..351c048d2 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() {
}
void VKUpdateDescriptorQueue::Acquire() {
- entries.clear();
-}
+ // Minimum number of entries required.
+ // This is the maximum number of entries a single draw call migth use.
+ static constexpr std::size_t MIN_ENTRIES = 0x400;
-void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
- VkDescriptorSet set) {
- if (payload.size() + entries.size() >= payload.max_size()) {
+ if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
payload.clear();
}
+ upload_start = &*payload.end();
+}
- // TODO(Rodrigo): Rework to write the payload directly
- const auto payload_start = payload.data() + payload.size();
- for (const auto& entry : entries) {
- if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
- payload.push_back(*image);
- } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) {
- payload.push_back(*buffer);
- } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
- payload.push_back(*texel);
- } else {
- UNREACHABLE();
- }
- }
-
- scheduler.Record(
- [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
- logical->UpdateDescriptorSet(set, update_template, payload_start);
- });
+void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
+ VkDescriptorSet set) {
+ const void* const data = upload_start;
+ const vk::Device* const logical = &device.GetLogical();
+ scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
+ logical->UpdateDescriptorSet(set, update_template, data);
+ });
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index cc7e3dff4..945320c72 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -15,17 +15,13 @@ namespace Vulkan {
class VKDevice;
class VKScheduler;
-class DescriptorUpdateEntry {
-public:
- explicit DescriptorUpdateEntry() {}
-
- DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
+struct DescriptorUpdateEntry {
+ DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
- DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {}
+ DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
- DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
+ DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
-private:
union {
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
@@ -45,32 +41,34 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkSampler sampler, VkImageView image_view) {
- entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
}
void AddImage(VkImageView image_view) {
- entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
+ payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
}
void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
- entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
+ payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
}
void AddTexelBuffer(VkBufferView texel_buffer) {
- entries.emplace_back(texel_buffer);
+ payload.emplace_back(texel_buffer);
}
- VkImageLayout* GetLastImageLayout() {
- return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout;
+ VkImageLayout* LastImageLayout() {
+ return &payload.back().image.imageLayout;
}
-private:
- using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>;
+ const VkImageLayout* LastImageLayout() const {
+ return &payload.back().image.imageLayout;
+ }
+private:
const VKDevice& device;
VKScheduler& scheduler;
- boost::container::static_vector<Variant, 0x400> entries;
+ const DescriptorUpdateEntry* upload_start = nullptr;
boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
};
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 2ce9b0626..0d485a662 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
bool Load(InstanceDispatch& dld) noexcept {
#define X(name) Proc(dld.name, dld, #name)
- return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties);
+ return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) &&
+ X(vkEnumerateInstanceLayerProperties);
#undef X
}
@@ -725,8 +726,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
return supported == VK_TRUE;
}
-VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const
- noexcept {
+VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
VkSurfaceCapabilitiesKHR capabilities;
Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
return capabilities;
@@ -771,4 +771,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
return properties;
}
+std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
+ const InstanceDispatch& dld) {
+ u32 num;
+ if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ std::vector<VkLayerProperties> properties(num);
+ if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ return properties;
+}
+
} // namespace Vulkan::vk
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 98937a77a..d56fdb3f9 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -141,6 +141,7 @@ struct InstanceDispatch {
PFN_vkCreateInstance vkCreateInstance;
PFN_vkDestroyInstance vkDestroyInstance;
PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
+ PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;
PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
PFN_vkCreateDevice vkCreateDevice;
@@ -779,7 +780,7 @@ public:
bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
- VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept;
+ VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
@@ -996,4 +997,7 @@ private:
std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
const InstanceDispatch& dld);
+std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
+ const InstanceDispatch& dld);
+
} // namespace Vulkan::vk
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index a75a5cc63..eeac328a6 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -255,7 +255,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
Node op_b = Immediate(branch_case.cmp_value);
Node condition =
- GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
+ GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
auto result = Conditional(condition, {n});
bb.push_back(result);
global_code.push_back(result);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ PredCondition cond;
+ bool bf;
+ bool ftz;
+ bool neg_a;
+ bool abs_a;
+ bool neg_b;
+ bool abs_b;
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_C:
+ case OpCode::Id::HSET2_IMM:
+ cond = instr.hsetp2.cbuf_and_imm.cond;
+ bf = instr.Bit(53);
+ ftz = instr.Bit(54);
+ neg_a = instr.Bit(43);
+ abs_a = instr.Bit(44);
+ neg_b = instr.Bit(56);
+ abs_b = instr.Bit(54);
+ break;
+ case OpCode::Id::HSET2_R:
+ cond = instr.hsetp2.reg.cond;
+ bf = instr.Bit(49);
+ ftz = instr.Bit(50);
+ neg_a = instr.Bit(43);
+ abs_a = instr.Bit(44);
+ neg_b = instr.Bit(31);
+ abs_b = instr.Bit(30);
+ break;
+ default:
+ UNREACHABLE();
}
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
- op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
-
- Node op_b = [&]() {
+ Node op_b = [this, instr, opcode] {
switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_C:
+ // Inform as unimplemented as this is not tested.
+ UNIMPLEMENTED_MSG("HSET2_C is not implemented");
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
case OpCode::Id::HSET2_R:
return GetRegister(instr.gpr20);
+ case OpCode::Id::HSET2_IMM:
+ return UnpackHalfImmediate(instr, true);
default:
UNREACHABLE();
- return Immediate(0);
+ return Node{};
}
}();
- op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
- op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
- const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+ if (!ftz) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
+
+ Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
+ op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
+
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_R:
+ op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
+ [[fallthrough]];
+ case OpCode::Id::HSET2_C:
+ op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
+ break;
+ default:
+ break;
+ }
- const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b);
+ Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+
+ Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
// HSET2 operates on each half float in the pack.
std::array<Node, 2> values;
for (u32 i = 0; i < 2; ++i) {
- const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
- const Node true_value = Immediate(raw_value << (i * 16));
- const Node false_value = Immediate(0);
-
- const Node comparison =
- Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
- const Node predicate = Operation(combiner, comparison, second_pred);
+ const u32 raw_value = bf ? 0x3c00 : 0xffff;
+ Node true_value = Immediate(raw_value << (i * 16));
+ Node false_value = Immediate(0);
+ Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
+ Node predicate = Operation(combiner, comparison, second_pred);
values[i] =
- Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
+ Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
}
- const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
- SetRegister(bb, instr.gpr0, value);
+ Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
+ SetRegister(bb, instr.gpr0, move(value));
return pc;
}
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 60b6ad72a..07778dc3e 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
break;
case TextureFormat::B5G6R5:
case TextureFormat::B6G5R5:
+ case TextureFormat::BF10GF11RF11:
if (component == 0) {
return descriptor.b_type;
}
@@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
}
break;
}
- UNIMPLEMENTED_MSG("texture format not implement={}", format);
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return ComponentType::FLOAT;
}
@@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 6;
}
return 0;
+ case TextureFormat::BF10GF11RF11:
+ if (component == 1 || component == 2) {
+ return 11;
+ }
+ if (component == 0) {
+ return 10;
+ }
+ return 0;
case TextureFormat::G8R24:
if (component == 0) {
return 8;
@@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return (component == 0 || component == 1) ? 8 : 0;
case TextureFormat::G4R4:
return (component == 0 || component == 1) ? 4 : 0;
- default:
- UNIMPLEMENTED_MSG("texture format not implement={}", format);
- return 0;
}
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+ return 0;
}
std::size_t GetImageComponentMask(TextureFormat format) {
@@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) {
case TextureFormat::R32_B24G8:
case TextureFormat::B5G6R5:
case TextureFormat::B6G5R5:
+ case TextureFormat::BF10GF11RF11:
return std::size_t{R | G | B};
case TextureFormat::R32_G32:
case TextureFormat::R16_G16:
@@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) {
case TextureFormat::R8:
case TextureFormat::R1:
return std::size_t{R};
- default:
- UNIMPLEMENTED_MSG("texture format not implement={}", format);
- return std::size_t{R | G | B | A};
}
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+ return std::size_t{R | G | B | A};
}
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
@@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type,
return {std::move(original_value), true};
}
default:
- UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
+ UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
return {std::move(original_value), true};
}
}
@@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
default:
break;
}
- UNIMPLEMENTED_MSG("Unimplemented operation={} type={}",
+ UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
static_cast<u64>(instr.suatom_d.operation.Value()),
static_cast<u64>(instr.suatom_d.operation_type.Value()));
return OperationCode::AtomicImageAdd;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 9392f065b..63adbc4a3 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -387,7 +387,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
case OpCode::Id::RED: {
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
- UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
@@ -396,7 +395,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
Node value = GetRegister(instr.gpr0);
- bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
+ bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
break;
}
case OpCode::Id::ATOM: {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d4f95b18c..c0a8f233f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::YNegate);
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
- return Immediate(0U);
+ return Immediate(0x00ff'0000U);
case SystemVariable::WscaleFactorXY:
UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
return Immediate(0U);
@@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::WorkGroupIdY);
case SystemVariable::CtaIdZ:
return Operation(OperationCode::WorkGroupIdZ);
+ case SystemVariable::EqMask:
+ case SystemVariable::LtMask:
+ case SystemVariable::LeMask:
+ case SystemVariable::GtMask:
+ case SystemVariable::GeMask:
+ uses_warps = true;
+ switch (instr.sys20) {
+ case SystemVariable::EqMask:
+ return Operation(OperationCode::ThreadEqMask);
+ case SystemVariable::LtMask:
+ return Operation(OperationCode::ThreadLtMask);
+ case SystemVariable::LeMask:
+ return Operation(OperationCode::ThreadLeMask);
+ case SystemVariable::GtMask:
+ return Operation(OperationCode::ThreadGtMask);
+ case SystemVariable::GeMask:
+ return Operation(OperationCode::ThreadGeMask);
+ default:
+ UNREACHABLE();
+ return Immediate(0u);
+ }
default:
UNIMPLEMENTED_MSG("Unhandled system move: {}",
static_cast<u32>(instr.sys20.Value()));
@@ -272,10 +293,25 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
break;
}
+ case OpCode::Id::BAR: {
+ UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
+ bb.push_back(Operation(OperationCode::Barrier));
+ break;
+ }
case OpCode::Id::MEMBAR: {
- UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
- bb.push_back(Operation(OperationCode::MemoryBarrierGL));
+ const OperationCode type = [instr] {
+ switch (instr.membar.type) {
+ case Tegra::Shader::MembarType::CTA:
+ return OperationCode::MemoryBarrierGroup;
+ case Tegra::Shader::MembarType::GL:
+ return OperationCode::MemoryBarrierGlobal;
+ default:
+ UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
+ return OperationCode::MemoryBarrierGlobal;
+ }
+ }();
+ bb.push_back(Operation(type));
break;
}
case OpCode::Id::DEPBAR: {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 8f0bb996e..29ebf65ba 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc;
}
-ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
- std::optional<u32> buffer) {
+ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
+ SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
if (info.IsComplete()) {
return info;
}
- const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
- : registry.ObtainBoundSampler(offset);
if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
SamplerInfo sampler_info) {
- const auto offset = static_cast<u32>(sampler.index.Value());
- const auto info = GetSamplerInfo(sampler_info, offset);
+ const u32 offset = static_cast<u32>(sampler.index.Value());
+ const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
const Node sampler_register = GetRegister(reg);
const auto [base_node, tracked_sampler_info] =
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT(base_node != nullptr);
- if (base_node == nullptr) {
+ if (!base_node) {
+ UNREACHABLE();
return std::nullopt;
}
- if (const auto bindless_sampler_info =
- std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
- const u32 buffer = bindless_sampler_info->GetIndex();
- const u32 offset = bindless_sampler_info->GetOffset();
- info = GetSamplerInfo(info, offset, buffer);
+ if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+ const u32 buffer = sampler_info->index;
+ const u32 offset = sampler_info->offset;
+ info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
- [buffer = buffer, offset = offset](const Sampler& entry) {
+ [buffer, offset](const Sampler& entry) {
return entry.buffer == buffer && entry.offset == offset;
});
if (it != used_samplers.end()) {
@@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
*info.is_shadow, *info.is_buffer, false);
}
- if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
- const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
- index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
- info = GetSamplerInfo(info, base_offset);
+ if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
+ const std::pair indices = sampler_info->indices;
+ const std::pair offsets = sampler_info->offsets;
+ info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
+
+ // Try to use an already created sampler if it exists
+ const auto it = std::find_if(
+ used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
+ return offsets == std::pair{entry.offset, entry.secondary_offset} &&
+ indices == std::pair{entry.buffer, entry.secondary_buffer};
+ });
+ if (it != used_samplers.end()) {
+ ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
+ it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
+ return *it;
+ }
+
+ // Otherwise create a new mapping for this sampler
+ const u32 next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
+ *info.is_shadow, *info.is_buffer);
+ }
+ if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+ const u32 base_offset = sampler_info->base_offset / 4;
+ index_var = GetCustomVariable(sampler_info->bindless_var);
+ info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
// If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 6191ffba1..c83dc6615 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -97,19 +97,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
}
case Tegra::Shader::XmadMode::CSfu: {
- const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a,
- op_a, Immediate(0));
- const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b,
- op_b, Immediate(0));
+ const Node comp_a =
+ GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
+ const Node comp_b =
+ GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
const Node comp_minus_a = GetPredicateComparisonInteger(
- PredCondition::NotEqual, is_signed_a,
+ PredCondition::NE, is_signed_a,
SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
Immediate(0x80000000)),
Immediate(0));
const Node comp_minus_b = GetPredicateComparisonInteger(
- PredCondition::NotEqual, is_signed_b,
+ PredCondition::NE, is_signed_b,
SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
Immediate(0x80000000)),
Immediate(0));
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 074f21691..5071c83ca 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -66,12 +66,12 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_add
u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
const ProgramCode& code_b) {
- u64 unique_identifier = boost::hash_value(code);
+ size_t unique_identifier = boost::hash_value(code);
if (is_a) {
// VertexA programs include two programs
boost::hash_combine(unique_identifier, boost::hash_value(code_b));
}
- return unique_identifier;
+ return static_cast<u64>(unique_identifier);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 601c822d2..8f230d57a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -110,13 +110,20 @@ enum class OperationCode {
LogicalPick2, /// (bool2 pair, uint index) -> bool
LogicalAnd2, /// (bool2 a) -> bool
- LogicalFLessThan, /// (float a, float b) -> bool
- LogicalFEqual, /// (float a, float b) -> bool
- LogicalFLessEqual, /// (float a, float b) -> bool
- LogicalFGreaterThan, /// (float a, float b) -> bool
- LogicalFNotEqual, /// (float a, float b) -> bool
- LogicalFGreaterEqual, /// (float a, float b) -> bool
- LogicalFIsNan, /// (float a) -> bool
+ LogicalFOrdLessThan, /// (float a, float b) -> bool
+ LogicalFOrdEqual, /// (float a, float b) -> bool
+ LogicalFOrdLessEqual, /// (float a, float b) -> bool
+ LogicalFOrdGreaterThan, /// (float a, float b) -> bool
+ LogicalFOrdNotEqual, /// (float a, float b) -> bool
+ LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
+ LogicalFOrdered, /// (float a, float b) -> bool
+ LogicalFUnordered, /// (float a, float b) -> bool
+ LogicalFUnordLessThan, /// (float a, float b) -> bool
+ LogicalFUnordEqual, /// (float a, float b) -> bool
+ LogicalFUnordLessEqual, /// (float a, float b) -> bool
+ LogicalFUnordGreaterThan, /// (float a, float b) -> bool
+ LogicalFUnordNotEqual, /// (float a, float b) -> bool
+ LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
LogicalILessThan, /// (int a, int b) -> bool
LogicalIEqual, /// (int a, int b) -> bool
@@ -219,9 +226,16 @@ enum class OperationCode {
VoteEqual, /// (bool) -> bool
ThreadId, /// () -> uint
+ ThreadEqMask, /// () -> uint
+ ThreadGeMask, /// () -> uint
+ ThreadGtMask, /// () -> uint
+ ThreadLeMask, /// () -> uint
+ ThreadLtMask, /// () -> uint
ShuffleIndexed, /// (uint value, uint index) -> uint
- MemoryBarrierGL, /// () -> void
+ Barrier, /// () -> void
+ MemoryBarrierGroup, /// () -> void
+ MemoryBarrierGlobal, /// () -> void
Amount,
};
@@ -261,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
-class BindlessSamplerNode;
-class ArraySamplerNode;
+struct ArraySamplerNode;
+struct BindlessSamplerNode;
+struct SeparateSamplerNode;
-using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
+using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
using TrackSampler = std::shared_ptr<TrackSamplerData>;
struct Sampler {
@@ -274,63 +289,51 @@ struct Sampler {
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
is_buffer{is_buffer}, is_indexed{is_indexed} {}
+ /// Separate sampler constructor
+ constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
+ Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
+ bool is_buffer)
+ : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
+ buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
+ is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
+
/// Bindless samplers constructor
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
: index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
- u32 index = 0; ///< Emulated index given for the this sampler.
- u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
- u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
- u32 size = 1; ///< Size of the sampler.
+ u32 index = 0; ///< Emulated index given for the this sampler.
+ u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
+ u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
+ u32 buffer = 0; ///< Buffer where the bindless sampler is read.
+ u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
+ u32 size = 1; ///< Size of the sampler.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
- bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
- bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
- bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
- bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
- bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
+ bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
+ bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
+ bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
+ bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
+ bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
+ bool is_separated = false; ///< Whether the image and sampler is separated or not.
};
/// Represents a tracked bindless sampler into a direct const buffer
-class ArraySamplerNode final {
-public:
- explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
- : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
-
- constexpr u32 GetIndex() const {
- return index;
- }
-
- constexpr u32 GetBaseOffset() const {
- return base_offset;
- }
-
- constexpr u32 GetIndexVar() const {
- return bindless_var;
- }
-
-private:
+struct ArraySamplerNode {
u32 index;
u32 base_offset;
u32 bindless_var;
};
-/// Represents a tracked bindless sampler into a direct const buffer
-class BindlessSamplerNode final {
-public:
- explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
-
- constexpr u32 GetIndex() const {
- return index;
- }
-
- constexpr u32 GetOffset() const {
- return offset;
- }
+/// Represents a tracked separate sampler image pair that was folded statically
+struct SeparateSamplerNode {
+ std::pair<u32, u32> indices;
+ std::pair<u32, u32> offsets;
+};
-private:
+/// Represents a tracked bindless sampler into a direct const buffer
+struct BindlessSamplerNode {
u32 index;
u32 offset;
};
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 11231bbea..1e0886185 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
template <typename T, typename... Args>
TrackSampler MakeTrackSampler(Args&&... args) {
static_assert(std::is_convertible_v<T, TrackSamplerData>);
- return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
+ return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
}
template <typename... Args>
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index af70b3f35..cdf274e54 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
return value;
}
+std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
+ std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
+ SeparateSamplerKey key;
+ key.buffers = buffers;
+ key.offsets = offsets;
+ const auto iter = separate_samplers.find(key);
+ if (iter != separate_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+
+ const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
+ const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
+ const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
+ separate_samplers.emplace(key, value);
+ return value;
+}
+
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
u32 offset) {
const std::pair key = {buffer, offset};
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 0c80d35fd..231206765 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -19,8 +19,39 @@
namespace VideoCommon::Shader {
+struct SeparateSamplerKey {
+ std::pair<u32, u32> buffers;
+ std::pair<u32, u32> offsets;
+};
+
+} // namespace VideoCommon::Shader
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::Shader::SeparateSamplerKey> {
+ std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
+ return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
+ key.offsets.second);
+ }
+};
+
+template <>
+struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
+ bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
+ const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
+ return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
+ }
+};
+
+} // namespace std
+
+namespace VideoCommon::Shader {
+
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using SeparateSamplerMap =
+ std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
@@ -73,6 +104,9 @@ public:
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
+ std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
+
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
/// Inserts a key.
@@ -128,6 +162,7 @@ private:
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys;
BoundSamplerMap bound_samplers;
+ SeparateSamplerMap separate_samplers;
BindlessSamplerMap bindless_samplers;
u32 bound_buffer;
GraphicsInfo graphics_info;
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 822674926..e322c3402 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -10,6 +10,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
@@ -243,56 +244,44 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
}
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
+ if (condition == PredCondition::T) {
+ return GetPredicate(true);
+ } else if (condition == PredCondition::F) {
+ return GetPredicate(false);
+ }
+
static constexpr std::array comparison_table{
- std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan},
- std::pair{PredCondition::Equal, OperationCode::LogicalFEqual},
- std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
- std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
- std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
- std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
- std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
- std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
- std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
- std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
- std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual},
+ OperationCode(0),
+ OperationCode::LogicalFOrdLessThan, // LT
+ OperationCode::LogicalFOrdEqual, // EQ
+ OperationCode::LogicalFOrdLessEqual, // LE
+ OperationCode::LogicalFOrdGreaterThan, // GT
+ OperationCode::LogicalFOrdNotEqual, // NE
+ OperationCode::LogicalFOrdGreaterEqual, // GE
+ OperationCode::LogicalFOrdered, // NUM
+ OperationCode::LogicalFUnordered, // NAN
+ OperationCode::LogicalFUnordLessThan, // LTU
+ OperationCode::LogicalFUnordEqual, // EQU
+ OperationCode::LogicalFUnordLessEqual, // LEU
+ OperationCode::LogicalFUnordGreaterThan, // GTU
+ OperationCode::LogicalFUnordNotEqual, // NEU
+ OperationCode::LogicalFUnordGreaterEqual, // GEU
};
+ const std::size_t index = static_cast<std::size_t>(condition);
+ ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
- const auto comparison =
- std::find_if(comparison_table.cbegin(), comparison_table.cend(),
- [condition](const auto entry) { return condition == entry.first; });
- UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
- "Unknown predicate comparison operation");
-
- Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
-
- if (condition == PredCondition::LessThanWithNan ||
- condition == PredCondition::NotEqualWithNan ||
- condition == PredCondition::LessEqualWithNan ||
- condition == PredCondition::GreaterThanWithNan ||
- condition == PredCondition::GreaterEqualWithNan) {
- predicate = Operation(OperationCode::LogicalOr, predicate,
- Operation(OperationCode::LogicalFIsNan, op_a));
- predicate = Operation(OperationCode::LogicalOr, predicate,
- Operation(OperationCode::LogicalFIsNan, op_b));
- }
-
- return predicate;
+ return Operation(comparison_table[index], op_a, op_b);
}
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
Node op_b) {
static constexpr std::array comparison_table{
- std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan},
- std::pair{PredCondition::Equal, OperationCode::LogicalIEqual},
- std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
- std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
- std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
- std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
- std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
- std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
- std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
- std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
- std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
+ std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
+ std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
+ std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
+ std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
+ std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
+ std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
};
const auto comparison =
@@ -301,32 +290,24 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
- Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
- std::move(op_b));
-
- UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
- condition == PredCondition::NotEqualWithNan ||
- condition == PredCondition::LessEqualWithNan ||
- condition == PredCondition::GreaterThanWithNan ||
- condition == PredCondition::GreaterEqualWithNan,
- "NaN comparisons for integers are not implemented");
- return predicate;
+ return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
+ std::move(op_b));
}
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
Node op_b) {
static constexpr std::array comparison_table{
- std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan},
- std::pair{PredCondition::Equal, OperationCode::Logical2HEqual},
- std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
- std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
- std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
- std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
- std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
- std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
- std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
- std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
- std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan},
+ std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
+ std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
+ std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
+ std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
+ std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
+ std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
+ std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
+ std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
+ std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
+ std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
+ std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
};
const auto comparison =
@@ -397,7 +378,7 @@ void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc
if (!sets_cc) {
return;
}
- Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
+ Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 15ae152f2..3a98b2104 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -330,8 +330,8 @@ private:
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
/// Queries the missing sampler info from the execution context.
- SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset,
- std::optional<u32> buffer = std::nullopt);
+ SamplerInfo GetSamplerInfo(SamplerInfo info,
+ std::optional<Tegra::Engines::SamplerDescriptor> sampler);
/// Accesses a texture sampler.
std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
@@ -409,8 +409,14 @@ private:
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
- std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor);
+ std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
+ s64 cursor);
+
+ std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
+ const OperationNode& operation,
+ Node gpr, Node base_offset,
+ Node tracked, const NodeBlock& code,
+ s64 cursor);
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index eb97bfd41..d5ed81442 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -14,6 +14,7 @@
namespace VideoCommon::Shader {
namespace {
+
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
@@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
operation->SetAmendIndex(amend_index);
return true;
- } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+ }
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
conditional->SetAmendIndex(amend_index);
return true;
}
@@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
} // Anonymous namespace
-std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
- s64 cursor) {
+std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
+ s64 cursor) {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
+ const u32 cbuf_index = cbuf->GetIndex();
+
// Constant buffer found, test if it's an immediate
const auto& offset = cbuf->GetOffset();
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- auto track =
- MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
+ auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
return {tracked, track};
}
if (const auto operation = std::get_if<OperationNode>(&*offset)) {
const u32 bound_buffer = registry.GetBoundBuffer();
- if (bound_buffer != cbuf->GetIndex()) {
+ if (bound_buffer != cbuf_index) {
return {};
}
- const auto pair = DecoupleIndirectRead(*operation);
- if (!pair) {
- return {};
+ if (const std::optional pair = DecoupleIndirectRead(*operation)) {
+ auto [gpr, base_offset] = *pair;
+ return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
+ code, cursor);
}
- auto [gpr, base_offset] = *pair;
- const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
- const auto& gpu_driver = registry.AccessGuestDriverProfile();
- const u32 bindless_cv = NewCustomVariable();
- Node op =
- Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
-
- const Node cv_node = GetCustomVariable(bindless_cv);
- Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
- const std::size_t amend_index = DeclareAmend(std::move(amend_op));
- AmendNodeCv(amend_index, code[cursor]);
- // TODO Implement Bindless Index custom variable
- auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
- offset_inm->GetValue(), bindless_cv);
- return {tracked, track};
}
return {};
}
@@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
return TrackBindlessSampler(source, code, new_cursor);
}
if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
- for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
- if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
- std::get<0>(found)) {
- // Cbuf found in operand.
+ const OperationNode& op = *operation;
+
+ const OperationCode opcode = operation->GetCode();
+ if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
+ ASSERT(op.GetOperandsCount() == 2);
+ auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
+ auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
+ if (node_a && node_b) {
+ auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
+ std::pair{offset_a, offset_b});
+ return {tracked, std::move(track)};
+ }
+ }
+ std::size_t i = op.GetOperandsCount();
+ while (i--) {
+ if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
+ // Constant buffer found in operand.
return found;
}
}
@@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
return {};
}
+std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
+ const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
+ const NodeBlock& code, s64 cursor) {
+ const auto offset_imm = std::get<ImmediateNode>(*base_offset);
+ const auto& gpu_driver = registry.AccessGuestDriverProfile();
+ const u32 bindless_cv = NewCustomVariable();
+ const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
+ Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
+
+ Node cv_node = GetCustomVariable(bindless_cv);
+ Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
+ const std::size_t amend_index = DeclareAmend(std::move(amend_op));
+ AmendNodeCv(amend_index, code[cursor]);
+
+ // TODO: Implement bindless index custom variable
+ auto track =
+ MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
+ return {tracked, track};
+}
+
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
s64 cursor) const {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
new file mode 100644
index 000000000..2dd270e99
--- /dev/null
+++ b/src/video_core/shader_cache.h
@@ -0,0 +1,228 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+template <class T>
+class ShaderCache {
+ static constexpr u64 PAGE_BITS = 14;
+
+ struct Entry {
+ VAddr addr_start;
+ VAddr addr_end;
+ T* data;
+
+ bool is_memory_marked = true;
+
+ constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
+ return start < addr_end && addr_start < end;
+ }
+ };
+
+public:
+ virtual ~ShaderCache() = default;
+
+ /// @brief Removes shaders inside a given region
+ /// @note Checks for ranges
+ /// @param addr Start address of the invalidation
+ /// @param size Number of bytes of the invalidation
+ void InvalidateRegion(VAddr addr, std::size_t size) {
+ std::scoped_lock lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+ RemovePendingShaders();
+ }
+
+ /// @brief Unmarks a memory region as cached and marks it for removal
+ /// @param addr Start address of the CPU write operation
+ /// @param size Number of bytes of the CPU write operation
+ void OnCPUWrite(VAddr addr, std::size_t size) {
+ std::lock_guard lock{invalidation_mutex};
+ InvalidatePagesInRegion(addr, size);
+ }
+
+ /// @brief Flushes delayed removal operations
+ void SyncGuestHost() {
+ std::scoped_lock lock{invalidation_mutex};
+ RemovePendingShaders();
+ }
+
+ /// @brief Tries to obtain a cached shader starting in a given address
+ /// @note Doesn't check for ranges, the given address has to be the start of the shader
+ /// @param addr Start address of the shader, this doesn't cache for region
+ /// @return Pointer to a valid shader, nullptr when nothing is found
+ T* TryGet(VAddr addr) const {
+ std::scoped_lock lock{lookup_mutex};
+
+ const auto it = lookup_cache.find(addr);
+ if (it == lookup_cache.end()) {
+ return nullptr;
+ }
+ return it->second->data;
+ }
+
+protected:
+ explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
+
+ /// @brief Register in the cache a given entry
+ /// @param data Shader to store in the cache
+ /// @param addr Start address of the shader that will be registered
+ /// @param size Size in bytes of the shader
+ void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
+ std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+ const VAddr addr_end = addr + size;
+ Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+ const u64 page_end = addr_end >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+ invalidation_cache[page].push_back(entry);
+ }
+
+ storage.push_back(std::move(data));
+
+ rasterizer.UpdatePagesCachedCount(addr, size, 1);
+ }
+
+ /// @brief Called when a shader is going to be removed
+ /// @param shader Shader that will be removed
+ /// @pre invalidation_cache is locked
+ /// @pre lookup_mutex is locked
+ virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
+
+private:
+ /// @brief Invalidate pages in a given region
+ /// @pre invalidation_mutex is locked
+ void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
+ const VAddr addr_end = addr + size;
+ const u64 page_end = addr_end >> PAGE_BITS;
+ for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
+ const auto it = invalidation_cache.find(page);
+ if (it == invalidation_cache.end()) {
+ continue;
+ }
+
+ std::vector<Entry*>& entries = it->second;
+ InvalidatePageEntries(entries, addr, addr_end);
+
+ // If there's nothing else in this page, remove it to avoid overpopulating the hash map.
+ if (entries.empty()) {
+ invalidation_cache.erase(it);
+ }
+ }
+ }
+
+ /// @brief Remove shaders marked for deletion
+ /// @pre invalidation_mutex is locked
+ void RemovePendingShaders() {
+ if (marked_for_removal.empty()) {
+ return;
+ }
+ std::scoped_lock lock{lookup_mutex};
+
+ std::vector<T*> removed_shaders;
+ removed_shaders.reserve(marked_for_removal.size());
+
+ for (Entry* const entry : marked_for_removal) {
+ if (lookup_cache.erase(entry->addr_start) > 0) {
+ removed_shaders.push_back(entry->data);
+ }
+ }
+ marked_for_removal.clear();
+
+ if (!removed_shaders.empty()) {
+ RemoveShadersFromStorage(std::move(removed_shaders));
+ }
+ }
+
+ /// @brief Invalidates entries in a given range for the passed page
+ /// @param entries Vector of entries in the page, it will be modified on overlaps
+ /// @param addr Start address of the invalidation
+ /// @param addr_end Non-inclusive end address of the invalidation
+ /// @pre invalidation_mutex is locked
+ void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+ auto it = entries.begin();
+ while (it != entries.end()) {
+ Entry* const entry = *it;
+ if (!entry->Overlaps(addr, addr_end)) {
+ ++it;
+ continue;
+ }
+ UnmarkMemory(entry);
+ marked_for_removal.push_back(entry);
+
+ it = entries.erase(it);
+ }
+ }
+
+ /// @brief Unmarks an entry from the rasterizer cache
+ /// @param entry Entry to unmark from memory
+ void UnmarkMemory(Entry* entry) {
+ if (!entry->is_memory_marked) {
+ return;
+ }
+ entry->is_memory_marked = false;
+
+ const VAddr addr = entry->addr_start;
+ const std::size_t size = entry->addr_end - addr;
+ rasterizer.UpdatePagesCachedCount(addr, size, -1);
+ }
+
+ /// @brief Removes a vector of shaders from a list
+ /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
+ /// @pre invalidation_mutex is locked
+ /// @pre lookup_mutex is locked
+ void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
+ // Remove duplicates
+ std::sort(removed_shaders.begin(), removed_shaders.end());
+ removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
+ removed_shaders.end());
+
+ // Now that there are no duplicates, we can notify removals
+ for (T* const shader : removed_shaders) {
+ OnShaderRemoval(shader);
+ }
+
+ // Remove them from the cache
+ const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
+ return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
+ removed_shaders.end();
+ };
+ storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
+ }
+
+ /// @brief Creates a new entry in the lookup cache and returns its pointer
+ /// @pre lookup_mutex is locked
+ Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
+ auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+ Entry* const entry_pointer = entry.get();
+
+ lookup_cache.emplace(addr, std::move(entry));
+ return entry_pointer;
+ }
+
+ VideoCore::RasterizerInterface& rasterizer;
+
+ mutable std::mutex lookup_mutex;
+ std::mutex invalidation_mutex;
+
+ std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
+ std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
+ std::vector<std::unique_ptr<T>> storage;
+ std::vector<Entry*> marked_for_removal;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index cc7181229..bbe93903c 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -145,6 +145,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::RG8U;
case Tegra::RenderTargetFormat::RG8_SNORM:
return PixelFormat::RG8S;
+ case Tegra::RenderTargetFormat::RG8_UINT:
+ return PixelFormat::RG8UI;
case Tegra::RenderTargetFormat::R16_FLOAT:
return PixelFormat::R16F;
case Tegra::RenderTargetFormat::R16_UNORM:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index e0acd44d3..6da6a1b97 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -57,51 +57,52 @@ enum class PixelFormat {
RGBA8_SRGB = 39,
RG8U = 40,
RG8S = 41,
- RG32UI = 42,
- RGBX16F = 43,
- R32UI = 44,
- R32I = 45,
- ASTC_2D_8X8 = 46,
- ASTC_2D_8X5 = 47,
- ASTC_2D_5X4 = 48,
- BGRA8_SRGB = 49,
- DXT1_SRGB = 50,
- DXT23_SRGB = 51,
- DXT45_SRGB = 52,
- BC7U_SRGB = 53,
- R4G4B4A4U = 54,
- ASTC_2D_4X4_SRGB = 55,
- ASTC_2D_8X8_SRGB = 56,
- ASTC_2D_8X5_SRGB = 57,
- ASTC_2D_5X4_SRGB = 58,
- ASTC_2D_5X5 = 59,
- ASTC_2D_5X5_SRGB = 60,
- ASTC_2D_10X8 = 61,
- ASTC_2D_10X8_SRGB = 62,
- ASTC_2D_6X6 = 63,
- ASTC_2D_6X6_SRGB = 64,
- ASTC_2D_10X10 = 65,
- ASTC_2D_10X10_SRGB = 66,
- ASTC_2D_12X12 = 67,
- ASTC_2D_12X12_SRGB = 68,
- ASTC_2D_8X6 = 69,
- ASTC_2D_8X6_SRGB = 70,
- ASTC_2D_6X5 = 71,
- ASTC_2D_6X5_SRGB = 72,
- E5B9G9R9F = 73,
+ RG8UI = 42,
+ RG32UI = 43,
+ RGBX16F = 44,
+ R32UI = 45,
+ R32I = 46,
+ ASTC_2D_8X8 = 47,
+ ASTC_2D_8X5 = 48,
+ ASTC_2D_5X4 = 49,
+ BGRA8_SRGB = 50,
+ DXT1_SRGB = 51,
+ DXT23_SRGB = 52,
+ DXT45_SRGB = 53,
+ BC7U_SRGB = 54,
+ R4G4B4A4U = 55,
+ ASTC_2D_4X4_SRGB = 56,
+ ASTC_2D_8X8_SRGB = 57,
+ ASTC_2D_8X5_SRGB = 58,
+ ASTC_2D_5X4_SRGB = 59,
+ ASTC_2D_5X5 = 60,
+ ASTC_2D_5X5_SRGB = 61,
+ ASTC_2D_10X8 = 62,
+ ASTC_2D_10X8_SRGB = 63,
+ ASTC_2D_6X6 = 64,
+ ASTC_2D_6X6_SRGB = 65,
+ ASTC_2D_10X10 = 66,
+ ASTC_2D_10X10_SRGB = 67,
+ ASTC_2D_12X12 = 68,
+ ASTC_2D_12X12_SRGB = 69,
+ ASTC_2D_8X6 = 70,
+ ASTC_2D_8X6_SRGB = 71,
+ ASTC_2D_6X5 = 72,
+ ASTC_2D_6X5_SRGB = 73,
+ E5B9G9R9F = 74,
MaxColorFormat,
// Depth formats
- Z32F = 74,
- Z16 = 75,
+ Z32F = 75,
+ Z16 = 76,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 76,
- S8Z24 = 77,
- Z32FS8 = 78,
+ Z24S8 = 77,
+ S8Z24 = 78,
+ Z32FS8 = 79,
MaxDepthStencilFormat,
@@ -171,6 +172,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // RGBA8_SRGB
0, // RG8U
0, // RG8S
+ 0, // RG8UI
0, // RG32UI
0, // RGBX16F
0, // R32UI
@@ -269,6 +271,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // RGBA8_SRGB
1, // RG8U
1, // RG8S
+ 1, // RG8UI
1, // RG32UI
1, // RGBX16F
1, // R32UI
@@ -359,6 +362,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // RGBA8_SRGB
1, // RG8U
1, // RG8S
+ 1, // RG8UI
1, // RG32UI
1, // RGBX16F
1, // R32UI
@@ -449,6 +453,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
32, // RGBA8_SRGB
16, // RG8U
16, // RG8S
+ 16, // RG8UI
64, // RG32UI
64, // RGBX16F
32, // R32UI
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 25d2ee2e8..f476f03b0 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 76> DefinitionTable = {{
+constexpr std::array<Table, 78> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -60,6 +60,7 @@ constexpr std::array<Table, 76> DefinitionTable = {{
{TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
{TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
+ {TextureFormat::G8R8, C, UINT, UINT, UINT, UINT, PixelFormat::RG8UI},
{TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
{TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
@@ -97,6 +98,7 @@ constexpr std::array<Table, 76> DefinitionTable = {{
{TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
{TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
{TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
+ {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
{TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
{TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 715f39d0d..0caf3b4f0 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
}
const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
const auto layer{static_cast<u32>(relative_address / layer_size)};
+ if (layer >= params.depth) {
+ return {};
+ }
const GPUVAddr mipmap_address = relative_address - layer_size * layer;
const auto mipmap_it =
Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
@@ -248,12 +251,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
// Use an extra temporal buffer
auto& tmp_buffer = staging_cache.GetBuffer(1);
- // Special case for 3D Texture Segments
- const bool must_read_current_data =
- params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D;
tmp_buffer.resize(guest_memory_size);
host_ptr = tmp_buffer.data();
- if (must_read_current_data) {
+
+ if (params.target == SurfaceTarget::Texture3D) {
+ // Special case for 3D texture segments
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
}
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 79e10ffbb..173f2edba 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -217,8 +217,8 @@ public:
}
bool IsProtected() const {
- // Only 3D Slices are to be protected
- return is_target && params.block_depth > 0;
+ // Only 3D slices are to be protected
+ return is_target && params.target == SurfaceTarget::Texture3D;
}
bool IsRenderTarget() const {
@@ -250,6 +250,11 @@ public:
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
}
+ TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
+ return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
+ base_level, num_levels));
+ }
+
std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
const GPUVAddr view_addr,
const std::size_t candidate_size, const u32 mipmap,
@@ -272,8 +277,8 @@ public:
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
const std::size_t candidate_size) {
if (params.target == SurfaceTarget::Texture3D ||
- (params.num_levels == 1 && !params.is_layered) ||
- view_params.target == SurfaceTarget::Texture3D) {
+ view_params.target == SurfaceTarget::Texture3D ||
+ (params.num_levels == 1 && !params.is_layered)) {
return {};
}
const auto layer_mipmap{GetLayerMipmap(view_addr)};
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 884fabffe..0b2b2b8c4 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
params.num_levels = 1;
params.emulated_levels = 1;
- const bool is_layered = config.layers > 1 && params.block_depth == 0;
- params.is_layered = is_layered;
- params.depth = is_layered ? config.layers.Value() : 1;
- params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
+ if (config.memory_layout.is_3d != 0) {
+ params.depth = config.layers.Value();
+ params.is_layered = false;
+ params.target = SurfaceTarget::Texture3D;
+ } else if (config.layers > 1) {
+ params.depth = config.layers.Value();
+ params.is_layered = true;
+ params.target = SurfaceTarget::Texture2DArray;
+ } else {
+ params.depth = 1;
+ params.is_layered = false;
+ params.target = SurfaceTarget::Texture2D;
+ }
return params;
}
@@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
params.width = config.width;
params.height = config.height;
params.pitch = config.pitch;
- // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
+ // TODO(Rodrigo): Try to guess texture arrays from parameters
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
params.num_levels = 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d6efc34b2..6207d8dfe 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
#include <unordered_map>
#include <vector>
+#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
@@ -23,6 +24,7 @@
#include "core/core.h"
#include "core/memory.h"
#include "core/settings.h"
+#include "video_core/compatible_formats.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
@@ -46,13 +48,14 @@ class RasterizerInterface;
namespace VideoCommon {
+using VideoCore::Surface::FormatCompatibility;
using VideoCore::Surface::PixelFormat;
-
using VideoCore::Surface::SurfaceTarget;
using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView>
class TextureCache {
+ using VectorSurface = boost::container::small_vector<TSurface, 1>;
public:
void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -296,30 +299,30 @@ public:
const GPUVAddr src_gpu_addr = src_config.Address();
const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
- const std::optional<VAddr> dst_cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
- const std::optional<VAddr> src_cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
- std::pair<TSurface, TView> dst_surface =
- GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
- std::pair<TSurface, TView> src_surface =
- GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
- ImageBlit(src_surface.second, dst_surface.second, copy_config);
+
+ const auto& memory_manager = system.GPU().MemoryManager();
+ const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
+ const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
+ std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
+ TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
+ ImageBlit(src_surface, dst_surface.second, copy_config);
dst_surface.first->MarkAsModified(true, Tick());
}
- TSurface TryFindFramebufferSurface(VAddr addr) {
+ TSurface TryFindFramebufferSurface(VAddr addr) const {
if (!addr) {
return nullptr;
}
const VAddr page = addr >> registry_page_bits;
- std::vector<TSurface>& list = registry[page];
- for (auto& surface : list) {
- if (surface->GetCpuAddr() == addr) {
- return surface;
- }
+ const auto it = registry.find(page);
+ if (it == registry.end()) {
+ return nullptr;
}
- return nullptr;
+ const auto& list = it->second;
+ const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
+ return surface->GetCpuAddr() == addr;
+ });
+ return found != list.end() ? *found : nullptr;
}
u64 Tick() {
@@ -498,18 +501,18 @@ private:
* @param untopological Indicates to the recycler that the texture has no way
* to match the overlaps due to topological reasons.
**/
- RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
+ RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
if (Settings::IsGPULevelExtreme()) {
return RecycleStrategy::Flush;
}
// 3D Textures decision
- if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
+ if (params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
for (const auto& s : overlaps) {
const auto& s_params = s->GetSurfaceParams();
- if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
+ if (s_params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush;
}
}
@@ -538,9 +541,8 @@ private:
* @param untopological Indicates to the recycler that the texture has no way to match the
* overlaps due to topological reasons.
**/
- std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
- const SurfaceParams& params, const GPUVAddr gpu_addr,
- const bool preserve_contents,
+ std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
+ const GPUVAddr gpu_addr, const bool preserve_contents,
const MatchTopologyResult untopological) {
const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
for (auto& surface : overlaps) {
@@ -594,7 +596,7 @@ private:
} else {
new_surface = GetUncachedSurface(gpu_addr, params);
}
- const auto& final_params = new_surface->GetSurfaceParams();
+ const SurfaceParams& final_params = new_surface->GetSurfaceParams();
if (cr_params.type != final_params.type) {
if (Settings::IsGPULevelExtreme()) {
BufferCopy(current_surface, new_surface);
@@ -602,7 +604,7 @@ private:
} else {
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
for (auto& brick : bricks) {
- ImageCopy(current_surface, new_surface, brick);
+ TryCopyImage(current_surface, new_surface, brick);
}
}
Unregister(current_surface);
@@ -650,47 +652,65 @@ private:
* @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface.
**/
- std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
+ std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
const SurfaceParams& params,
- const GPUVAddr gpu_addr) {
+ GPUVAddr gpu_addr) {
if (params.target == SurfaceTarget::Texture3D) {
- return {};
+ return std::nullopt;
}
- bool modified = false;
+ const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
- u32 passed_tests = 0;
+
+ if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) {
+ LoadSurface(new_surface);
+ for (const auto& surface : overlaps) {
+ Unregister(surface);
+ }
+ Register(new_surface);
+ return {{new_surface, new_surface->GetMainView()}};
+ }
+
+ std::size_t passed_tests = 0;
for (auto& surface : overlaps) {
const SurfaceParams& src_params = surface->GetSurfaceParams();
- if (src_params.is_layered || src_params.num_levels > 1) {
- // We send this cases to recycle as they are more complex to handle
- return {};
- }
- const std::size_t candidate_size = surface->GetSizeInBytes();
- auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
+ const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
if (!mipmap_layer) {
continue;
}
- const auto [layer, mipmap] = *mipmap_layer;
- if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
+ const auto [base_layer, base_mipmap] = *mipmap_layer;
+ if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) {
continue;
}
- modified |= surface->IsModified();
- // Now we got all the data set up
- const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
- const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
- const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1);
- passed_tests++;
- ImageCopy(surface, new_surface, copy_params);
+ ++passed_tests;
+
+ // Copy all mipmaps and layers
+ const u32 block_width = params.GetDefaultBlockWidth();
+ const u32 block_height = params.GetDefaultBlockHeight();
+ for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
+ const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
+ const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
+ if (width < block_width || height < block_height) {
+ // Current APIs forbid copying small compressed textures, avoid errors
+ break;
+ }
+ const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
+ src_params.depth);
+ TryCopyImage(surface, new_surface, copy_params);
+ }
}
if (passed_tests == 0) {
- return {};
+ return std::nullopt;
+ }
+ if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
// In Accurate GPU all tests should pass, else we recycle
- } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
- return {};
+ return std::nullopt;
}
+
+ const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified);
for (const auto& surface : overlaps) {
Unregister(surface);
}
+
new_surface->MarkAsModified(modified, Tick());
Register(new_surface);
return {{new_surface, new_surface->GetMainView()}};
@@ -708,53 +728,11 @@ private:
* @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank.
*/
- std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
+ std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
const SurfaceParams& params,
- const GPUVAddr gpu_addr,
- const VAddr cpu_addr,
+ GPUVAddr gpu_addr, VAddr cpu_addr,
bool preserve_contents) {
- if (params.target == SurfaceTarget::Texture3D) {
- bool failed = false;
- if (params.num_levels > 1) {
- // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
- return std::nullopt;
- }
- TSurface new_surface = GetUncachedSurface(gpu_addr, params);
- bool modified = false;
- for (auto& surface : overlaps) {
- const SurfaceParams& src_params = surface->GetSurfaceParams();
- if (src_params.target != SurfaceTarget::Texture2D) {
- failed = true;
- break;
- }
- if (src_params.height != params.height) {
- failed = true;
- break;
- }
- if (src_params.block_depth != params.block_depth ||
- src_params.block_height != params.block_height) {
- failed = true;
- break;
- }
- const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
- const auto offsets = params.GetBlockOffsetXYZ(offset);
- const auto z = std::get<2>(offsets);
- modified |= surface->IsModified();
- const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
- 1);
- ImageCopy(surface, new_surface, copy_params);
- }
- if (failed) {
- return std::nullopt;
- }
- for (const auto& surface : overlaps) {
- Unregister(surface);
- }
- new_surface->MarkAsModified(modified, Tick());
- Register(new_surface);
- auto view = new_surface->GetMainView();
- return {{std::move(new_surface), view}};
- } else {
+ if (params.target != SurfaceTarget::Texture3D) {
for (const auto& surface : overlaps) {
if (!surface->MatchTarget(params.target)) {
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
@@ -770,11 +748,60 @@ private:
continue;
}
if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
- return {{surface, surface->GetMainView()}};
+ return std::make_pair(surface, surface->GetMainView());
}
}
return InitializeSurface(gpu_addr, params, preserve_contents);
}
+
+ if (params.num_levels > 1) {
+ // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
+ return std::nullopt;
+ }
+
+ if (overlaps.size() == 1) {
+ const auto& surface = overlaps[0];
+ const SurfaceParams& overlap_params = surface->GetSurfaceParams();
+ // Don't attempt to render to textures with more than one level for now
+ // The texture has to be to the right or the sample address if we want to render to it
+ if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
+ const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
+ const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
+ if (slice < overlap_params.depth) {
+ auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
+ return std::make_pair(std::move(surface), std::move(view));
+ }
+ }
+ }
+
+ TSurface new_surface = GetUncachedSurface(gpu_addr, params);
+ bool modified = false;
+
+ for (auto& surface : overlaps) {
+ const SurfaceParams& src_params = surface->GetSurfaceParams();
+ if (src_params.target != SurfaceTarget::Texture2D ||
+ src_params.height != params.height ||
+ src_params.block_depth != params.block_depth ||
+ src_params.block_height != params.block_height) {
+ return std::nullopt;
+ }
+ modified |= surface->IsModified();
+
+ const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
+ const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
+ const u32 width = params.width;
+ const u32 height = params.height;
+ const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
+ TryCopyImage(surface, new_surface, copy_params);
+ }
+ for (const auto& surface : overlaps) {
+ Unregister(surface);
+ }
+ new_surface->MarkAsModified(modified, Tick());
+ Register(new_surface);
+
+ TView view = new_surface->GetMainView();
+ return std::make_pair(std::move(new_surface), std::move(view));
}
/**
@@ -810,7 +837,7 @@ private:
TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) {
- std::vector<TSurface> overlaps{current_surface};
+ VectorSurface overlaps{current_surface};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
@@ -852,7 +879,7 @@ private:
}
}
- // Check if it's a 3D texture
+ // Manage 3D textures
if (params.block_depth > 0) {
auto surface =
Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
@@ -868,12 +895,9 @@ private:
// two things either the candidate surface is a supertexture of the overlap
// or they don't match in any known way.
if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
- if (current_surface->GetGpuAddr() == gpu_addr) {
- std::optional<std::pair<TSurface, TView>> view =
- TryReconstructSurface(overlaps, params, gpu_addr);
- if (view) {
- return *view;
- }
+ const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr);
+ if (view) {
+ return *view;
}
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
MatchTopologyResult::FullMatch);
@@ -991,7 +1015,9 @@ private:
params.target = target;
params.is_tiled = false;
params.srgb_conversion = false;
- params.is_layered = false;
+ params.is_layered =
+ target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray ||
+ target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray;
params.block_width = 0;
params.block_height = 0;
params.block_depth = 0;
@@ -1028,7 +1054,7 @@ private:
void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
- auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
+ auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
if (deduced_src.Failed() || deduced_dst.Failed()) {
return;
}
@@ -1124,23 +1150,25 @@ private:
}
}
- std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
+ VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
if (size == 0) {
return {};
}
const VAddr cpu_addr_end = cpu_addr + size;
- VAddr start = cpu_addr >> registry_page_bits;
const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
- std::vector<TSurface> surfaces;
- while (start <= end) {
- std::vector<TSurface>& list = registry[start];
- for (auto& surface : list) {
- if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
- surface->MarkAsPicked(true);
- surfaces.push_back(surface);
+ VectorSurface surfaces;
+ for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
+ const auto it = registry.find(start);
+ if (it == registry.end()) {
+ continue;
+ }
+ for (auto& surface : it->second) {
+ if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
+ continue;
}
+ surface->MarkAsPicked(true);
+ surfaces.push_back(surface);
}
- start++;
}
for (auto& surface : surfaces) {
surface->MarkAsPicked(false);
@@ -1165,6 +1193,19 @@ private:
return {};
}
+ /// Try to do an image copy logging when formats are incompatible.
+ void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) {
+ const SurfaceParams& src_params = src->GetSurfaceParams();
+ const SurfaceParams& dst_params = dst->GetSurfaceParams();
+ if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) {
+ LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}",
+ static_cast<int>(dst_params.pixel_format),
+ static_cast<int>(src_params.pixel_format));
+ return;
+ }
+ ImageCopy(src, dst, copy);
+ }
+
constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
return siblings_table[static_cast<std::size_t>(format)];
}
@@ -1214,6 +1255,7 @@ private:
VideoCore::RasterizerInterface& rasterizer;
FormatLookupTable format_lookup_table;
+ FormatCompatibility format_compatibility;
u64 ticks{};
diff --git a/src/web_service/CMakeLists.txt b/src/web_service/CMakeLists.txt
index 0c9bb0d55..06ab7c59d 100644
--- a/src/web_service/CMakeLists.txt
+++ b/src/web_service/CMakeLists.txt
@@ -8,4 +8,4 @@ add_library(web_service STATIC
)
create_target_directory_groups(web_service)
-target_link_libraries(web_service PRIVATE common json-headers httplib lurlparser)
+target_link_libraries(web_service PRIVATE common nlohmann_json::nlohmann_json httplib lurlparser)
diff --git a/src/web_service/telemetry_json.cpp b/src/web_service/telemetry_json.cpp
index 7538389bf..7a480e33c 100644
--- a/src/web_service/telemetry_json.cpp
+++ b/src/web_service/telemetry_json.cpp
@@ -2,7 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <json.hpp>
+#include <nlohmann/json.hpp>
#include "common/detached_tasks.h"
#include "common/web_result.h"
#include "web_service/telemetry_json.h"
diff --git a/src/web_service/verify_login.cpp b/src/web_service/verify_login.cpp
index ca4b43b93..bfaa5b70a 100644
--- a/src/web_service/verify_login.cpp
+++ b/src/web_service/verify_login.cpp
@@ -2,7 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <json.hpp>
+#include <nlohmann/json.hpp>
#include "common/web_result.h"
#include "web_service/verify_login.h"
#include "web_service/web_backend.h"
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 8b9404718..75c27e39e 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -208,6 +208,10 @@ if (MSVC)
copy_yuzu_unicorn_deps(yuzu)
endif()
+if (NOT APPLE)
+ target_compile_definitions(yuzu PRIVATE HAS_OPENGL)
+endif()
+
if (ENABLE_VULKAN)
target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include)
target_compile_definitions(yuzu PRIVATE HAS_VULKAN)
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 3d759f77b..4bfce48a4 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -8,13 +8,16 @@
#include <QHBoxLayout>
#include <QKeyEvent>
#include <QMessageBox>
-#include <QOffscreenSurface>
-#include <QOpenGLContext>
#include <QPainter>
#include <QScreen>
#include <QStringList>
#include <QWindow>
+#ifdef HAS_OPENGL
+#include <QOffscreenSurface>
+#include <QOpenGLContext>
+#endif
+
#if !defined(WIN32) && HAS_VULKAN
#include <qpa/qplatformnativeinterface.h>
#endif
@@ -41,49 +44,65 @@ EmuThread::EmuThread() = default;
EmuThread::~EmuThread() = default;
void EmuThread::run() {
- MicroProfileOnThreadCreate("EmuThread");
+ std::string name = "yuzu:EmuControlThread";
+ MicroProfileOnThreadCreate(name.c_str());
+ Common::SetCurrentThreadName(name.c_str());
+
+ auto& system = Core::System::GetInstance();
+
+ system.RegisterHostThread();
+
+ auto& gpu = system.GPU();
// Main process has been loaded. Make the context current to this thread and begin GPU and CPU
// execution.
- Core::System::GetInstance().GPU().Start();
+ gpu.Start();
+
+ gpu.ObtainContext();
emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
- Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
+ system.Renderer().Rasterizer().LoadDiskResources(
stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
emit LoadProgress(stage, value, total);
});
emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
+ gpu.ReleaseContext();
+
// Holds whether the cpu was running during the last iteration,
// so that the DebugModeLeft signal can be emitted before the
// next execution step
bool was_active = false;
while (!stop_run) {
if (running) {
- if (!was_active)
+ if (was_active) {
emit DebugModeLeft();
+ }
- Core::System::ResultStatus result = Core::System::GetInstance().RunLoop();
+ running_guard = true;
+ Core::System::ResultStatus result = system.Run();
+ if (result != Core::System::ResultStatus::Success) {
+ running_guard = false;
+ this->SetRunning(false);
+ emit ErrorThrown(result, system.GetStatusDetails());
+ }
+ running_wait.Wait();
+ result = system.Pause();
if (result != Core::System::ResultStatus::Success) {
+ running_guard = false;
this->SetRunning(false);
- emit ErrorThrown(result, Core::System::GetInstance().GetStatusDetails());
+ emit ErrorThrown(result, system.GetStatusDetails());
}
+ running_guard = false;
- was_active = running || exec_step;
- if (!was_active && !stop_run)
+ if (!stop_run) {
+ was_active = true;
emit DebugModeEntered();
+ }
} else if (exec_step) {
- if (!was_active)
- emit DebugModeLeft();
-
- exec_step = false;
- Core::System::GetInstance().SingleStep();
- emit DebugModeEntered();
- yieldCurrentThread();
-
- was_active = false;
+ UNIMPLEMENTED();
} else {
std::unique_lock lock{running_mutex};
running_cv.wait(lock, [this] { return IsRunning() || exec_step || stop_run; });
@@ -91,13 +110,14 @@ void EmuThread::run() {
}
// Shutdown the core emulation
- Core::System::GetInstance().Shutdown();
+ system.Shutdown();
#if MICROPROFILE_ENABLED
MicroProfileOnThreadExit();
#endif
}
+#ifdef HAS_OPENGL
class OpenGLSharedContext : public Core::Frontend::GraphicsContext {
public:
/// Create the original context that should be shared from
@@ -106,6 +126,9 @@ public:
format.setVersion(4, 3);
format.setProfile(QSurfaceFormat::CompatibilityProfile);
format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
+ if (Settings::values.renderer_debug) {
+ format.setOption(QSurfaceFormat::FormatOption::DebugContext);
+ }
// TODO: expose a setting for buffer value (ie default/single/double/triple)
format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
format.setSwapInterval(0);
@@ -150,18 +173,19 @@ public:
}
void MakeCurrent() override {
- if (is_current) {
- return;
+ // We can't track the current state of the underlying context in this wrapper class because
+ // Qt may make the underlying context not current for one reason or another. In particular,
+ // the WebBrowser uses GL, so it seems to conflict if we aren't careful.
+ // Instead of always just making the context current (which does not have any caching to
+ // check if the underlying context is already current) we can check for the current context
+ // in the thread local data by calling `currentContext()` and checking if its ours.
+ if (QOpenGLContext::currentContext() != context.get()) {
+ context->makeCurrent(surface);
}
- is_current = context->makeCurrent(surface);
}
void DoneCurrent() override {
- if (!is_current) {
- return;
- }
context->doneCurrent();
- is_current = false;
}
QOpenGLContext* GetShareContext() {
@@ -178,8 +202,8 @@ private:
std::unique_ptr<QOpenGLContext> context;
std::unique_ptr<QOffscreenSurface> offscreen_surface{};
QSurface* surface;
- bool is_current = false;
};
+#endif
class DummyContext : public Core::Frontend::GraphicsContext {};
@@ -352,7 +376,7 @@ QByteArray GRenderWindow::saveGeometry() {
}
qreal GRenderWindow::windowPixelRatio() const {
- return devicePixelRatio();
+ return devicePixelRatioF();
}
std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF& pos) const {
@@ -470,6 +494,7 @@ void GRenderWindow::resizeEvent(QResizeEvent* event) {
}
std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
+#ifdef HAS_OPENGL
if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) {
auto c = static_cast<OpenGLSharedContext*>(main_context.get());
// Bind the shared contexts to the main surface in case the backend wants to take over
@@ -477,6 +502,7 @@ std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedCont
return std::make_unique<OpenGLSharedContext>(c->GetShareContext(),
child_widget->windowHandle());
}
+#endif
return std::make_unique<DummyContext>();
}
@@ -557,6 +583,7 @@ void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal
}
bool GRenderWindow::InitializeOpenGL() {
+#ifdef HAS_OPENGL
// TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground,
// WA_DontShowOnScreen, WA_DeleteOnClose
auto child = new OpenGLRenderWidget(this);
@@ -568,6 +595,11 @@ bool GRenderWindow::InitializeOpenGL() {
std::make_unique<OpenGLSharedContext>(context->GetShareContext(), child->windowHandle()));
return true;
+#else
+ QMessageBox::warning(this, tr("OpenGL not available!"),
+ tr("yuzu has not been compiled with OpenGL support."));
+ return false;
+#endif
}
bool GRenderWindow::InitializeVulkan() {
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 3626604ca..6c59b4d5c 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -59,6 +59,12 @@ public:
this->running = running;
lock.unlock();
running_cv.notify_all();
+ if (!running) {
+ running_wait.Set();
+ /// Wait until effectively paused
+ while (running_guard)
+ ;
+ }
}
/**
@@ -84,6 +90,8 @@ private:
std::atomic_bool stop_run{false};
std::mutex running_mutex;
std::condition_variable running_cv;
+ Common::Event running_wait{};
+ std::atomic_bool running_guard{false};
signals:
/**
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 75c6cf20b..bbbd96113 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -211,7 +211,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
// This must be in alphabetical order according to action name as it must have the same order as
// UISetting::values.shortcuts, which is alphabetically ordered.
// clang-format off
-const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{
+const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
{QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::ApplicationShortcut}},
{QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
{QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
@@ -222,6 +222,7 @@ const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{
{QStringLiteral("Increase Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("+"), Qt::ApplicationShortcut}},
{QStringLiteral("Load Amiibo"), QStringLiteral("Main Window"), {QStringLiteral("F2"), Qt::ApplicationShortcut}},
{QStringLiteral("Load File"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+O"), Qt::WindowShortcut}},
+ {QStringLiteral("Mute Audio"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+M"), Qt::WindowShortcut}},
{QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},
{QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},
{QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
@@ -533,6 +534,8 @@ void Config::ReadDebuggingValues() {
Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool();
Settings::values.disable_cpu_opt =
ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool();
+ Settings::values.disable_macro_jit =
+ ReadSetting(QStringLiteral("disable_macro_jit"), false).toBool();
qt_config->endGroup();
}
@@ -629,13 +632,11 @@ void Config::ReadRendererValues() {
static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt());
Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool();
Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
- Settings::values.resolution_factor =
- ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
Settings::values.use_frame_limit =
ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
- Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
+ Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt();
Settings::values.use_disk_shader_cache =
ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
@@ -643,6 +644,8 @@ void Config::ReadRendererValues() {
Settings::values.use_asynchronous_gpu_emulation =
ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
+ Settings::values.use_assembly_shaders =
+ ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool();
Settings::values.use_fast_gpu_time =
ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool();
Settings::values.force_30fps_mode =
@@ -687,6 +690,8 @@ void Config::ReadSystemValues() {
Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt();
+ Settings::values.time_zone_index = ReadSetting(QStringLiteral("time_zone_index"), 0).toInt();
+
const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool();
if (rng_seed_enabled) {
Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong();
@@ -716,8 +721,6 @@ void Config::ReadUIValues() {
.toString();
UISettings::values.enable_discord_presence =
ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
- UISettings::values.screenshot_resolution_factor =
- static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt());
UISettings::values.select_user_on_boot =
ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
@@ -1007,6 +1010,7 @@ void Config::SaveDebuggingValues() {
WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false);
WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false);
WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false);
+ WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false);
qt_config->endGroup();
}
@@ -1075,8 +1079,6 @@ void Config::SaveRendererValues() {
WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0);
WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
- WriteSetting(QStringLiteral("resolution_factor"),
- static_cast<double>(Settings::values.resolution_factor), 1.0);
WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
@@ -1088,6 +1090,8 @@ void Config::SaveRendererValues() {
WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
Settings::values.use_asynchronous_gpu_emulation, false);
WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
+ WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders,
+ false);
WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true);
WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);
@@ -1126,6 +1130,7 @@ void Config::SaveSystemValues() {
WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0);
WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1);
WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1);
+ WriteSetting(QStringLiteral("time_zone_index"), Settings::values.time_zone_index, 0);
WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false);
WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0);
@@ -1149,8 +1154,6 @@ void Config::SaveUIValues() {
QString::fromUtf8(UISettings::themes[0].second));
WriteSetting(QStringLiteral("enable_discord_presence"),
UISettings::values.enable_discord_presence, true);
- WriteSetting(QStringLiteral("screenshot_resolution_factor"),
- UISettings::values.screenshot_resolution_factor, 0);
WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
false);
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 5cd2a5feb..09316382c 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -27,7 +27,7 @@ public:
default_mouse_buttons;
static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
- static const std::array<UISettings::Shortcut, 15> default_hotkeys;
+ static const std::array<UISettings::Shortcut, 16> default_hotkeys;
private:
void ReadValues();
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index c2026763e..2c77441fd 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -39,6 +39,8 @@ void ConfigureDebug::SetConfiguration() {
ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt);
ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn());
ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug);
+ ui->disable_macro_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+ ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit);
}
void ConfigureDebug::ApplyConfiguration() {
@@ -51,6 +53,7 @@ void ConfigureDebug::ApplyConfiguration() {
Settings::values.quest_flag = ui->quest_flag->isChecked();
Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked();
Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
+ Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
Debugger::ToggleConsole();
Log::Filter filter;
filter.ParseFilterString(Settings::values.log_filter);
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index e0d4c4a44..46f0208c6 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -148,6 +148,19 @@
</property>
</widget>
</item>
+ <item>
+ <widget class="QCheckBox" name="disable_macro_jit">
+ <property name="enabled">
+ <bool>true</bool>
+ </property>
+ <property name="whatsThis">
+ <string>When checked, it disables the macro Just In Time compiler. Enabled this makes games run slower</string>
+ </property>
+ <property name="text">
+ <string>Disable Macro JIT</string>
+ </property>
+ </widget>
+ </item>
</layout>
</widget>
</item>
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index cb95423e0..74b2ad537 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -23,6 +23,11 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
ConfigureGeneral::~ConfigureGeneral() = default;
void ConfigureGeneral::SetConfiguration() {
+ const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
+
+ ui->use_multi_core->setEnabled(runtime_lock);
+ ui->use_multi_core->setChecked(Settings::values.use_multi_core);
+
ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot);
ui->toggle_background_pause->setChecked(UISettings::values.pause_when_in_background);
@@ -41,6 +46,7 @@ void ConfigureGeneral::ApplyConfiguration() {
Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
Settings::values.frame_limit = ui->frame_limit->value();
+ Settings::values.use_multi_core = ui->use_multi_core->isChecked();
}
void ConfigureGeneral::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index fc3b7e65a..2711116a2 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -52,6 +52,13 @@
</layout>
</item>
<item>
+ <widget class="QCheckBox" name="use_multi_core">
+ <property name="text">
+ <string>Multicore CPU Emulation</string>
+ </property>
+ </widget>
+ </item>
+ <item>
<widget class="QCheckBox" name="toggle_check_exit">
<property name="text">
<string>Confirm exit while emulation is running</string>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index ea667caef..304625cd7 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -19,47 +19,6 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#endif
-namespace {
-enum class Resolution : int {
- Auto,
- Scale1x,
- Scale2x,
- Scale3x,
- Scale4x,
-};
-
-float ToResolutionFactor(Resolution option) {
- switch (option) {
- case Resolution::Auto:
- return 0.f;
- case Resolution::Scale1x:
- return 1.f;
- case Resolution::Scale2x:
- return 2.f;
- case Resolution::Scale3x:
- return 3.f;
- case Resolution::Scale4x:
- return 4.f;
- }
- return 0.f;
-}
-
-Resolution FromResolutionFactor(float factor) {
- if (factor == 0.f) {
- return Resolution::Auto;
- } else if (factor == 1.f) {
- return Resolution::Scale1x;
- } else if (factor == 2.f) {
- return Resolution::Scale2x;
- } else if (factor == 3.f) {
- return Resolution::Scale3x;
- } else if (factor == 4.f) {
- return Resolution::Scale4x;
- }
- return Resolution::Auto;
-}
-} // Anonymous namespace
-
ConfigureGraphics::ConfigureGraphics(QWidget* parent)
: QWidget(parent), ui(new Ui::ConfigureGraphics) {
vulkan_device = Settings::values.vulkan_device;
@@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() {
ui->api->setEnabled(runtime_lock);
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
- ui->resolution_factor_combobox->setCurrentIndex(
- static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
ui->use_disk_shader_cache->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
@@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
void ConfigureGraphics::ApplyConfiguration() {
Settings::values.renderer_backend = GetCurrentGraphicsBackend();
Settings::values.vulkan_device = vulkan_device;
- Settings::values.resolution_factor =
- ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index c816d6108..6e75447a5 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -85,46 +85,6 @@
</widget>
</item>
<item>
- <layout class="QHBoxLayout" name="horizontalLayout_2">
- <item>
- <widget class="QLabel" name="label">
- <property name="text">
- <string>Internal Resolution:</string>
- </property>
- </widget>
- </item>
- <item>
- <widget class="QComboBox" name="resolution_factor_combobox">
- <item>
- <property name="text">
- <string>Auto (Window Size)</string>
- </property>
- </item>
- <item>
- <property name="text">
- <string>Native (1280x720)</string>
- </property>
- </item>
- <item>
- <property name="text">
- <string>2x Native (2560x1440)</string>
- </property>
- </item>
- <item>
- <property name="text">
- <string>3x Native (3840x2160)</string>
- </property>
- </item>
- <item>
- <property name="text">
- <string>4x Native (5120x2880)</string>
- </property>
- </item>
- </widget>
- </item>
- </layout>
- </item>
- <item>
<layout class="QHBoxLayout" name="horizontalLayout_6">
<item>
<widget class="QLabel" name="ar_label">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 5bb2ae555..be5006ad3 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -22,6 +22,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
ui->use_vsync->setEnabled(runtime_lock);
ui->use_vsync->setChecked(Settings::values.use_vsync);
+ ui->use_assembly_shaders->setEnabled(runtime_lock);
+ ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders);
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time);
ui->force_30fps_mode->setEnabled(runtime_lock);
ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
@@ -33,6 +35,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
Settings::values.gpu_accuracy = gpu_accuracy;
Settings::values.use_vsync = ui->use_vsync->isChecked();
+ Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked();
Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked();
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 770b80c50..0021607ac 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -63,6 +63,16 @@
</widget>
</item>
<item>
+ <widget class="QCheckBox" name="use_assembly_shaders">
+ <property name="toolTip">
+ <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
+ </property>
+ <property name="text">
+ <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
+ </property>
+ </widget>
+ </item>
+ <item>
<widget class="QCheckBox" name="force_30fps_mode">
<property name="text">
<string>Force 30 FPS mode</string>
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index e4eb5594b..a05fa64ba 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -480,7 +480,9 @@ void ConfigureInputPlayer::RestoreDefaults() {
SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]);
}
}
+
UpdateButtonLabels();
+ ApplyConfiguration();
}
void ConfigureInputPlayer::ClearAll() {
@@ -505,6 +507,7 @@ void ConfigureInputPlayer::ClearAll() {
}
UpdateButtonLabels();
+ ApplyConfiguration();
}
void ConfigureInputPlayer::UpdateButtonLabels() {
diff --git a/src/yuzu/configuration/configure_service.cpp b/src/yuzu/configuration/configure_service.cpp
index 06566e981..0de7a4f0b 100644
--- a/src/yuzu/configuration/configure_service.cpp
+++ b/src/yuzu/configuration/configure_service.cpp
@@ -68,6 +68,7 @@ void ConfigureService::SetConfiguration() {
}
std::pair<QString, QString> ConfigureService::BCATDownloadEvents() {
+#ifdef YUZU_ENABLE_BOXCAT
std::optional<std::string> global;
std::map<std::string, Service::BCAT::EventStatus> map;
const auto res = Service::BCAT::Boxcat::GetStatus(global, map);
@@ -105,7 +106,10 @@ std::pair<QString, QString> ConfigureService::BCATDownloadEvents() {
.arg(QString::fromStdString(key))
.arg(FormatEventStatusString(value));
}
- return {QStringLiteral("Current Boxcat Events"), std::move(out)};
+ return {tr("Current Boxcat Events"), std::move(out)};
+#else
+ return {tr("Current Boxcat Events"), tr("There are currently no events on boxcat.")};
+#endif
}
void ConfigureService::OnBCATImplChanged() {
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index f49cd4c8f..10315e7a6 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -57,6 +57,7 @@ void ConfigureSystem::SetConfiguration() {
ui->combo_language->setCurrentIndex(Settings::values.language_index);
ui->combo_region->setCurrentIndex(Settings::values.region_index);
+ ui->combo_time_zone->setCurrentIndex(Settings::values.time_zone_index);
ui->combo_sound->setCurrentIndex(Settings::values.sound_index);
ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value());
@@ -84,6 +85,7 @@ void ConfigureSystem::ApplyConfiguration() {
Settings::values.language_index = ui->combo_language->currentIndex();
Settings::values.region_index = ui->combo_region->currentIndex();
+ Settings::values.time_zone_index = ui->combo_time_zone->currentIndex();
Settings::values.sound_index = ui->combo_sound->currentIndex();
if (ui->rng_seed_checkbox->isChecked()) {
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h
index d8fa2d2cc..26d42d5c5 100644
--- a/src/yuzu/configuration/configure_system.h
+++ b/src/yuzu/configuration/configure_system.h
@@ -37,5 +37,6 @@ private:
int language_index = 0;
int region_index = 0;
+ int time_zone_index = 0;
int sound_index = 0;
};
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui
index 4e2c7e76e..9c8cca6dc 100644
--- a/src/yuzu/configuration/configure_system.ui
+++ b/src/yuzu/configuration/configure_system.ui
@@ -22,14 +22,14 @@
<string>System Settings</string>
</property>
<layout class="QGridLayout" name="gridLayout">
- <item row="2" column="0">
+ <item row="3" column="0">
<widget class="QLabel" name="label_sound">
<property name="text">
<string>Sound output mode</string>
</property>
</widget>
</item>
- <item row="3" column="0">
+ <item row="4" column="0">
<widget class="QLabel" name="label_console_id">
<property name="text">
<string>Console ID:</string>
@@ -174,14 +174,255 @@
</item>
</widget>
</item>
- <item row="5" column="0">
+ <item row="2" column="0">
+ <widget class="QLabel" name="label_timezone">
+ <property name="text">
+ <string>Time Zone:</string>
+ </property>
+ </widget>
+ </item>
+ <item row="2" column="1">
+ <widget class="QComboBox" name="combo_time_zone">
+ <item>
+ <property name="text">
+ <string>Auto</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Default</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>CET</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>CST6CDT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Cuba</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>EET</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Egypt</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Eire</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>EST</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>EST5EDT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GB</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GB-Eire</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GMT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GMT+0</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GMT-0</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>GMT0</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Greenwich</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Hongkong</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>HST</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Iceland</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Iran</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Israel</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Jamaica</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Japan</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Kwajalein</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Libya</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>MET</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>MST</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>MST7MDT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Navajo</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>NZ</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>NZ-CHAT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Poland</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Portugal</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>PRC</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>PST8PDT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>ROC</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>ROK</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Singapore</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Turkey</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>UCT</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Universal</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>UTC</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>W-SU</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>WET</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>Zulu</string>
+ </property>
+ </item>
+ </widget>
+ </item>
+ <item row="6" column="0">
<widget class="QCheckBox" name="rng_seed_checkbox">
<property name="text">
<string>RNG Seed</string>
</property>
</widget>
</item>
- <item row="2" column="1">
+ <item row="3" column="1">
<widget class="QComboBox" name="combo_sound">
<item>
<property name="text">
@@ -207,7 +448,7 @@
</property>
</widget>
</item>
- <item row="3" column="1">
+ <item row="4" column="1">
<widget class="QPushButton" name="button_regenerate_console_id">
<property name="sizePolicy">
<sizepolicy hsizetype="Fixed" vsizetype="Fixed">
@@ -223,14 +464,14 @@
</property>
</widget>
</item>
- <item row="4" column="0">
+ <item row="5" column="0">
<widget class="QCheckBox" name="custom_rtc_checkbox">
<property name="text">
<string>Custom RTC</string>
</property>
</widget>
</item>
- <item row="4" column="1">
+ <item row="5" column="1">
<widget class="QDateTimeEdit" name="custom_rtc_edit">
<property name="minimumDate">
<date>
@@ -244,7 +485,7 @@
</property>
</widget>
</item>
- <item row="5" column="1">
+ <item row="6" column="1">
<widget class="QLineEdit" name="rng_seed_edit">
<property name="sizePolicy">
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index c1ea25fb8..9bb0a0109 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -2,10 +2,13 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <fmt/format.h>
+
#include "yuzu/debugger/wait_tree.h"
#include "yuzu/util/util.h"
#include "common/assert.h"
+#include "core/arm/arm_interface.h"
#include "core/core.h"
#include "core/hle/kernel/handle_table.h"
#include "core/hle/kernel/mutex.h"
@@ -59,8 +62,10 @@ std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList()
std::size_t row = 0;
auto add_threads = [&](const std::vector<std::shared_ptr<Kernel::Thread>>& threads) {
for (std::size_t i = 0; i < threads.size(); ++i) {
- item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
- item_list.back()->row = row;
+ if (!threads[i]->IsHLEThread()) {
+ item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
+ item_list.back()->row = row;
+ }
++row;
}
};
@@ -114,20 +119,21 @@ QString WaitTreeCallstack::GetText() const {
std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() const {
std::vector<std::unique_ptr<WaitTreeItem>> list;
- constexpr std::size_t BaseRegister = 29;
- auto& memory = Core::System::GetInstance().Memory();
- u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister];
+ if (thread.IsHLEThread()) {
+ return list;
+ }
- while (base_pointer != 0) {
- const u64 lr = memory.Read64(base_pointer + sizeof(u64));
- if (lr == 0) {
- break;
- }
+ if (thread.GetOwnerProcess() == nullptr || !thread.GetOwnerProcess()->Is64BitProcess()) {
+ return list;
+ }
- list.push_back(std::make_unique<WaitTreeText>(
- tr("0x%1").arg(lr - sizeof(u32), 16, 16, QLatin1Char{'0'})));
+ auto backtrace = Core::ARM_Interface::GetBacktraceFromContext(Core::System::GetInstance(),
+ thread.GetContext64());
- base_pointer = memory.Read64(base_pointer);
+ for (auto& entry : backtrace) {
+ std::string s = fmt::format("{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address,
+ entry.original_address, entry.offset, entry.name);
+ list.push_back(std::make_unique<WaitTreeText>(QString::fromStdString(s)));
}
return list;
@@ -206,7 +212,15 @@ QString WaitTreeThread::GetText() const {
status = tr("running");
break;
case Kernel::ThreadStatus::Ready:
- status = tr("ready");
+ if (!thread.IsPaused()) {
+ if (thread.WasRunning()) {
+ status = tr("running");
+ } else {
+ status = tr("ready");
+ }
+ } else {
+ status = tr("paused");
+ }
break;
case Kernel::ThreadStatus::Paused:
status = tr("paused");
@@ -254,7 +268,15 @@ QColor WaitTreeThread::GetColor() const {
case Kernel::ThreadStatus::Running:
return QColor(Qt::GlobalColor::darkGreen);
case Kernel::ThreadStatus::Ready:
- return QColor(Qt::GlobalColor::darkBlue);
+ if (!thread.IsPaused()) {
+ if (thread.WasRunning()) {
+ return QColor(Qt::GlobalColor::darkGreen);
+ } else {
+ return QColor(Qt::GlobalColor::darkBlue);
+ }
+ } else {
+ return QColor(Qt::GlobalColor::lightGray);
+ }
case Kernel::ThreadStatus::Paused:
return QColor(Qt::GlobalColor::lightGray);
case Kernel::ThreadStatus::WaitHLEEvent:
@@ -319,7 +341,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
if (thread.GetStatus() == Kernel::ThreadStatus::WaitSynch) {
list.push_back(std::make_unique<WaitTreeObjectList>(thread.GetSynchronizationObjects(),
- thread.IsSleepingOnWait()));
+ thread.IsWaitingSync()));
}
list.push_back(std::make_unique<WaitTreeCallstack>(thread));
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp
index ea0079353..a93733b26 100644
--- a/src/yuzu/discord_impl.cpp
+++ b/src/yuzu/discord_impl.cpp
@@ -18,7 +18,7 @@ DiscordImpl::DiscordImpl() {
// The number is the client ID for yuzu, it's used for images and the
// application name
- Discord_Initialize("471872241299226636", &handlers, 1, nullptr);
+ Discord_Initialize("712465656758665259", &handlers, 1, nullptr);
}
DiscordImpl::~DiscordImpl() {
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index dccbabcbf..bfb600df0 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -488,11 +488,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, std::string pat
auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id);
navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0);
- connect(open_save_location, &QAction::triggered, [this, program_id]() {
- emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData);
+ connect(open_save_location, &QAction::triggered, [this, program_id, path]() {
+ emit OpenFolderRequested(GameListOpenTarget::SaveData, path);
});
- connect(open_lfs_location, &QAction::triggered, [this, program_id]() {
- emit OpenFolderRequested(program_id, GameListOpenTarget::ModData);
+ connect(open_lfs_location, &QAction::triggered, [this, program_id, path]() {
+ emit OpenFolderRequested(GameListOpenTarget::ModData, path);
});
connect(open_transferable_shader_cache, &QAction::triggered,
[this, program_id]() { emit OpenTransferableShaderCacheRequested(program_id); });
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index 878d94413..a38cb2fc3 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -73,7 +73,7 @@ public:
signals:
void GameChosen(QString game_path);
void ShouldCancelWorker();
- void OpenFolderRequested(u64 program_id, GameListOpenTarget target);
+ void OpenFolderRequested(GameListOpenTarget target, const std::string& game_path);
void OpenTransferableShaderCacheRequested(u64 program_id);
void DumpRomFSRequested(u64 program_id, const std::string& game_path);
void CopyTIDRequested(u64 program_id);
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 2a6483370..ae842306c 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -19,6 +19,7 @@
#include <QTime>
#include <QtConcurrent/QtConcurrentRun>
#include "common/logging/log.h"
+#include "core/frontend/framebuffer_layout.h"
#include "core/loader/loader.h"
#include "ui_loading_screen.h"
#include "video_core/rasterizer_interface.h"
@@ -61,7 +62,7 @@ LoadingScreen::LoadingScreen(QWidget* parent)
: QWidget(parent), ui(std::make_unique<Ui::LoadingScreen>()),
previous_stage(VideoCore::LoadCallbackStage::Complete) {
ui->setupUi(this);
- setMinimumSize(1280, 720);
+ setMinimumSize(Layout::MinimumSize::Width, Layout::MinimumSize::Height);
// Create a fade out effect to hide this loading screen widget.
// When fading opacity, it will fade to the parent widgets background color, which is why we
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 62f9d2ab2..f586950e7 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -56,6 +56,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include <QShortcut>
#include <QStatusBar>
#include <QSysInfo>
+#include <QUrl>
#include <QtConcurrent/QtConcurrent>
#include <fmt/format.h>
@@ -65,6 +66,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "common/logging/backend.h"
#include "common/logging/filter.h"
#include "common/logging/log.h"
+#include "common/memory_detect.h"
#include "common/microprofile.h"
#include "common/scm_rev.h"
#include "common/scope_exit.h"
@@ -216,9 +218,26 @@ GMainWindow::GMainWindow()
LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch,
Common::g_scm_desc);
#ifdef ARCHITECTURE_x86_64
- LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);
+ const auto& caps = Common::GetCPUCaps();
+ std::string cpu_string = caps.cpu_string;
+ if (caps.avx || caps.avx2 || caps.avx512) {
+ cpu_string += " | AVX";
+ if (caps.avx512) {
+ cpu_string += "512";
+ } else if (caps.avx2) {
+ cpu_string += '2';
+ }
+ if (caps.fma || caps.fma4) {
+ cpu_string += " | FMA";
+ }
+ }
+ LOG_INFO(Frontend, "Host CPU: {}", cpu_string);
#endif
LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());
+ LOG_INFO(Frontend, "Host RAM: {:.2f} GB",
+ Common::GetMemInfo().TotalPhysicalMemory / 1024.0f / 1024 / 1024);
+ LOG_INFO(Frontend, "Host Swap: {:.2f} GB",
+ Common::GetMemInfo().TotalSwapMemory / 1024.0f / 1024 / 1024);
UpdateWindowTitle();
show();
@@ -515,14 +534,36 @@ void GMainWindow::InitializeWidgets() {
if (emulation_running) {
return;
}
- Settings::values.use_asynchronous_gpu_emulation =
- !Settings::values.use_asynchronous_gpu_emulation;
+ bool is_async =
+ !Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
+ Settings::values.use_asynchronous_gpu_emulation = is_async;
async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
Settings::Apply();
});
async_status_button->setText(tr("ASYNC"));
async_status_button->setCheckable(true);
async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
+
+ // Setup Multicore button
+ multicore_status_button = new QPushButton();
+ multicore_status_button->setObjectName(QStringLiteral("TogglableStatusBarButton"));
+ multicore_status_button->setFocusPolicy(Qt::NoFocus);
+ connect(multicore_status_button, &QPushButton::clicked, [&] {
+ if (emulation_running) {
+ return;
+ }
+ Settings::values.use_multi_core = !Settings::values.use_multi_core;
+ bool is_async =
+ Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
+ Settings::values.use_asynchronous_gpu_emulation = is_async;
+ async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
+ multicore_status_button->setChecked(Settings::values.use_multi_core);
+ Settings::Apply();
+ });
+ multicore_status_button->setText(tr("MULTICORE"));
+ multicore_status_button->setCheckable(true);
+ multicore_status_button->setChecked(Settings::values.use_multi_core);
+ statusBar()->insertPermanentWidget(0, multicore_status_button);
statusBar()->insertPermanentWidget(0, async_status_button);
// Setup Renderer API button
@@ -648,6 +689,11 @@ void GMainWindow::InitializeHotkeys() {
ui.action_Capture_Screenshot->setShortcutContext(
hotkey_registry.GetShortcutContext(main_window, capture_screenshot));
+ ui.action_Fullscreen->setShortcut(
+ hotkey_registry.GetHotkey(main_window, fullscreen, this)->key());
+ ui.action_Fullscreen->setShortcutContext(
+ hotkey_registry.GetShortcutContext(main_window, fullscreen));
+
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this),
&QShortcut::activated, this, &GMainWindow::OnMenuLoadFile);
connect(
@@ -684,10 +730,7 @@ void GMainWindow::InitializeHotkeys() {
Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
UpdateStatusBar();
});
- // TODO: Remove this comment/static whenever the next major release of
- // MSVC occurs and we make it a requirement (see:
- // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
- static constexpr u16 SPEED_LIMIT_STEP = 5;
+ constexpr u16 SPEED_LIMIT_STEP = 5;
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this),
&QShortcut::activated, this, [&] {
if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -721,16 +764,19 @@ void GMainWindow::InitializeHotkeys() {
Settings::values.use_docked_mode);
dock_status_button->setChecked(Settings::values.use_docked_mode);
});
+ connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
+ &QShortcut::activated, this,
+ [] { Settings::values.audio_muted = !Settings::values.audio_muted; });
}
void GMainWindow::SetDefaultUIGeometry() {
- // geometry: 55% of the window contents are in the upper screen half, 45% in the lower half
+ // geometry: 53% of the window contents are in the upper screen half, 47% in the lower half
const QRect screenRect = QApplication::desktop()->screenGeometry(this);
const int w = screenRect.width() * 2 / 3;
- const int h = screenRect.height() / 2;
+ const int h = screenRect.height() * 2 / 3;
const int x = (screenRect.x() + screenRect.width()) / 2 - w / 2;
- const int y = (screenRect.y() + screenRect.height()) / 2 - h * 55 / 100;
+ const int y = (screenRect.y() + screenRect.height()) / 2 - h * 53 / 100;
setGeometry(x, y, w, h);
}
@@ -821,6 +867,10 @@ void GMainWindow::ConnectMenuEvents() {
connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame);
connect(ui.action_Report_Compatibility, &QAction::triggered, this,
&GMainWindow::OnMenuReportCompatibility);
+ connect(ui.action_Open_Mods_Page, &QAction::triggered, this, &GMainWindow::OnOpenModsPage);
+ connect(ui.action_Open_Quickstart_Guide, &QAction::triggered, this,
+ &GMainWindow::OnOpenQuickstartGuide);
+ connect(ui.action_Open_FAQ, &QAction::triggered, this, &GMainWindow::OnOpenFAQ);
connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); });
connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure);
@@ -831,12 +881,9 @@ void GMainWindow::ConnectMenuEvents() {
&GMainWindow::OnDisplayTitleBars);
connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar);
connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible);
+ connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize);
// Fullscreen
- ui.action_Fullscreen->setShortcut(
- hotkey_registry
- .GetHotkey(QStringLiteral("Main Window"), QStringLiteral("Fullscreen"), this)
- ->key());
connect(ui.action_Fullscreen, &QAction::triggered, this, &GMainWindow::ToggleFullscreen);
// Movie
@@ -904,6 +951,8 @@ bool GMainWindow::LoadROM(const QString& filename) {
nullptr, // E-Commerce
});
+ system.RegisterHostThread();
+
const Core::System::ResultStatus result{system.Load(*render_window, filename.toStdString())};
const auto drd_callout =
@@ -1020,29 +1069,31 @@ void GMainWindow::BootGame(const QString& filename) {
}
status_bar_update_timer.start(2000);
async_status_button->setDisabled(true);
+ multicore_status_button->setDisabled(true);
renderer_status_button->setDisabled(true);
if (UISettings::values.hide_mouse) {
mouse_hide_timer.start();
setMouseTracking(true);
ui.centralwidget->setMouseTracking(true);
- ui.menubar->setMouseTracking(true);
}
const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
std::string title_name;
+ std::string title_version;
const auto res = Core::System::GetInstance().GetGameName(title_name);
- if (res != Loader::ResultStatus::Success) {
- const auto metadata = FileSys::PatchManager(title_id).GetControlMetadata();
- if (metadata.first != nullptr)
- title_name = metadata.first->GetApplicationName();
- if (title_name.empty())
- title_name = FileUtil::GetFilename(filename.toStdString());
+ const auto metadata = FileSys::PatchManager(title_id).GetControlMetadata();
+ if (metadata.first != nullptr) {
+ title_version = metadata.first->GetVersionString();
+ title_name = metadata.first->GetApplicationName();
}
- LOG_INFO(Frontend, "Booting game: {:016X} | {}", title_id, title_name);
- UpdateWindowTitle(QString::fromStdString(title_name));
+ if (res != Loader::ResultStatus::Success || title_name.empty()) {
+ title_name = FileUtil::GetFilename(filename.toStdString());
+ }
+ LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version);
+ UpdateWindowTitle(title_name, title_version);
loading_screen->Prepare(Core::System::GetInstance().GetAppLoader());
loading_screen->show();
@@ -1099,7 +1150,6 @@ void GMainWindow::ShutdownGame() {
setMouseTracking(false);
ui.centralwidget->setMouseTracking(false);
- ui.menubar->setMouseTracking(false);
UpdateWindowTitle();
@@ -1109,6 +1159,7 @@ void GMainWindow::ShutdownGame() {
game_fps_label->setVisible(false);
emu_frametime_label->setVisible(false);
async_status_button->setEnabled(true);
+ multicore_status_button->setEnabled(true);
#ifdef HAS_VULKAN
renderer_status_button->setEnabled(true);
#endif
@@ -1156,39 +1207,61 @@ void GMainWindow::OnGameListLoadFile(QString game_path) {
BootGame(game_path);
}
-void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target) {
+void GMainWindow::OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path) {
std::string path;
QString open_target;
+
+ const auto v_file = Core::GetGameFileFromPath(vfs, game_path);
+ const auto loader = Loader::GetLoader(v_file);
+ FileSys::NACP control{};
+ u64 program_id{};
+
+ loader->ReadControlData(control);
+ loader->ReadProgramId(program_id);
+
+ const bool has_user_save{control.GetDefaultNormalSaveSize() > 0};
+ const bool has_device_save{control.GetDeviceSaveDataSize() > 0};
+
+ ASSERT_MSG(has_user_save != has_device_save, "Game uses both user and device savedata?");
+
switch (target) {
case GameListOpenTarget::SaveData: {
open_target = tr("Save Data");
const std::string nand_dir = FileUtil::GetUserPath(FileUtil::UserPath::NANDDir);
ASSERT(program_id != 0);
- const auto select_profile = [this] {
- QtProfileSelectionDialog dialog(this);
- dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
- Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint);
- dialog.setWindowModality(Qt::WindowModal);
+ if (has_user_save) {
+ // User save data
+ const auto select_profile = [this] {
+ QtProfileSelectionDialog dialog(this);
+ dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
+ Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint);
+ dialog.setWindowModality(Qt::WindowModal);
- if (dialog.exec() == QDialog::Rejected) {
- return -1;
- }
+ if (dialog.exec() == QDialog::Rejected) {
+ return -1;
+ }
- return dialog.GetIndex();
- };
+ return dialog.GetIndex();
+ };
- const auto index = select_profile();
- if (index == -1) {
- return;
- }
+ const auto index = select_profile();
+ if (index == -1) {
+ return;
+ }
- Service::Account::ProfileManager manager;
- const auto user_id = manager.GetUser(static_cast<std::size_t>(index));
- ASSERT(user_id);
- path = nand_dir + FileSys::SaveDataFactory::GetFullPath(FileSys::SaveDataSpaceId::NandUser,
- FileSys::SaveDataType::SaveData,
- program_id, user_id->uuid, 0);
+ Service::Account::ProfileManager manager;
+ const auto user_id = manager.GetUser(static_cast<std::size_t>(index));
+ ASSERT(user_id);
+ path = nand_dir + FileSys::SaveDataFactory::GetFullPath(
+ FileSys::SaveDataSpaceId::NandUser,
+ FileSys::SaveDataType::SaveData, program_id, user_id->uuid, 0);
+ } else {
+ // Device save data
+ path = nand_dir + FileSys::SaveDataFactory::GetFullPath(
+ FileSys::SaveDataSpaceId::NandUser,
+ FileSys::SaveDataType::SaveData, program_id, {}, 0);
+ }
if (!FileUtil::Exists(path)) {
FileUtil::CreateFullPath(path);
@@ -1771,6 +1844,26 @@ void GMainWindow::OnMenuReportCompatibility() {
}
}
+void GMainWindow::OpenURL(const QUrl& url) {
+ const bool open = QDesktopServices::openUrl(url);
+ if (!open) {
+ QMessageBox::warning(this, tr("Error opening URL"),
+ tr("Unable to open the URL \"%1\".").arg(url.toString()));
+ }
+}
+
+void GMainWindow::OnOpenModsPage() {
+ OpenURL(QUrl(QStringLiteral("https://github.com/yuzu-emu/yuzu/wiki/Switch-Mods")));
+}
+
+void GMainWindow::OnOpenQuickstartGuide() {
+ OpenURL(QUrl(QStringLiteral("https://yuzu-emu.org/help/quickstart/")));
+}
+
+void GMainWindow::OnOpenFAQ() {
+ OpenURL(QUrl(QStringLiteral("https://yuzu-emu.org/wiki/faq/")));
+}
+
void GMainWindow::ToggleFullscreen() {
if (!emulation_running) {
return;
@@ -1831,6 +1924,20 @@ void GMainWindow::ToggleWindowMode() {
}
}
+void GMainWindow::ResetWindowSize() {
+ const auto aspect_ratio = Layout::EmulationAspectRatio(
+ static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio),
+ static_cast<float>(Layout::ScreenUndocked::Height) / Layout::ScreenUndocked::Width);
+ if (!ui.action_Single_Window_Mode->isChecked()) {
+ render_window->resize(Layout::ScreenUndocked::Height / aspect_ratio,
+ Layout::ScreenUndocked::Height);
+ } else {
+ resize(Layout::ScreenUndocked::Height / aspect_ratio,
+ Layout::ScreenUndocked::Height + menuBar()->height() +
+ (ui.action_Show_Status_Bar->isChecked() ? statusBar()->height() : 0));
+ }
+}
+
void GMainWindow::OnConfigure() {
const auto old_theme = UISettings::values.theme;
const bool old_discord_presence = UISettings::values.enable_discord_presence;
@@ -1861,16 +1968,18 @@ void GMainWindow::OnConfigure() {
if (UISettings::values.hide_mouse && emulation_running) {
setMouseTracking(true);
ui.centralwidget->setMouseTracking(true);
- ui.menubar->setMouseTracking(true);
mouse_hide_timer.start();
} else {
setMouseTracking(false);
ui.centralwidget->setMouseTracking(false);
- ui.menubar->setMouseTracking(false);
}
dock_status_button->setChecked(Settings::values.use_docked_mode);
+ multicore_status_button->setChecked(Settings::values.use_multi_core);
+ Settings::values.use_asynchronous_gpu_emulation =
+ Settings::values.use_asynchronous_gpu_emulation || Settings::values.use_multi_core;
async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation);
+
#ifdef HAS_VULKAN
renderer_status_button->setChecked(Settings::values.renderer_backend ==
Settings::RendererBackend::Vulkan);
@@ -1957,7 +2066,8 @@ void GMainWindow::OnCaptureScreenshot() {
OnStartGame();
}
-void GMainWindow::UpdateWindowTitle(const QString& title_name) {
+void GMainWindow::UpdateWindowTitle(const std::string& title_name,
+ const std::string& title_version) {
const auto full_name = std::string(Common::g_build_fullname);
const auto branch_name = std::string(Common::g_scm_branch);
const auto description = std::string(Common::g_scm_desc);
@@ -1966,7 +2076,7 @@ void GMainWindow::UpdateWindowTitle(const QString& title_name) {
const auto date =
QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd")).toStdString();
- if (title_name.isEmpty()) {
+ if (title_name.empty()) {
const auto fmt = std::string(Common::g_title_bar_format_idle);
setWindowTitle(QString::fromStdString(fmt::format(fmt.empty() ? "yuzu {0}| {1}-{2}" : fmt,
full_name, branch_name, description,
@@ -1974,8 +2084,8 @@ void GMainWindow::UpdateWindowTitle(const QString& title_name) {
} else {
const auto fmt = std::string(Common::g_title_bar_format_running);
setWindowTitle(QString::fromStdString(
- fmt::format(fmt.empty() ? "yuzu {0}| {3} | {1}-{2}" : fmt, full_name, branch_name,
- description, title_name.toStdString(), date, build_id)));
+ fmt::format(fmt.empty() ? "yuzu {0}| {3} | {6} | {1}-{2}" : fmt, full_name, branch_name,
+ description, title_name, date, build_id, title_version)));
}
}
@@ -1997,7 +2107,7 @@ void GMainWindow::UpdateStatusBar() {
game_fps_label->setText(tr("Game: %1 FPS").arg(results.game_fps, 0, 'f', 0));
emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2));
- emu_speed_label->setVisible(true);
+ emu_speed_label->setVisible(!Settings::values.use_multi_core);
game_fps_label->setVisible(true);
emu_frametime_label->setVisible(true);
}
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 60b17c54a..66c84e5c0 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -181,9 +181,12 @@ private slots:
void OnPauseGame();
void OnStopGame();
void OnMenuReportCompatibility();
+ void OnOpenModsPage();
+ void OnOpenQuickstartGuide();
+ void OnOpenFAQ();
/// Called whenever a user selects a game in the game list widget.
void OnGameListLoadFile(QString game_path);
- void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target);
+ void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path);
void OnTransferableShaderCacheOpenFile(u64 program_id);
void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
void OnGameListCopyTID(u64 program_id);
@@ -208,16 +211,19 @@ private slots:
void ShowFullscreen();
void HideFullscreen();
void ToggleWindowMode();
+ void ResetWindowSize();
void OnCaptureScreenshot();
void OnCoreError(Core::System::ResultStatus, std::string);
void OnReinitializeKeys(ReinitializeKeyBehavior behavior);
private:
std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
- void UpdateWindowTitle(const QString& title_name = {});
+ void UpdateWindowTitle(const std::string& title_name = {},
+ const std::string& title_version = {});
void UpdateStatusBar();
void HideMouseCursor();
void ShowMouseCursor();
+ void OpenURL(const QUrl& url);
Ui::MainWindow ui;
@@ -233,6 +239,7 @@ private:
QLabel* game_fps_label = nullptr;
QLabel* emu_frametime_label = nullptr;
QPushButton* async_status_button = nullptr;
+ QPushButton* multicore_status_button = nullptr;
QPushButton* renderer_status_button = nullptr;
QPushButton* dock_status_button = nullptr;
QTimer status_bar_update_timer;
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index ae414241e..bee6e107e 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
- <width>1081</width>
- <height>730</height>
+ <width>1280</width>
+ <height>720</height>
</rect>
</property>
<property name="windowTitle">
@@ -44,7 +44,7 @@
<rect>
<x>0</x>
<y>0</y>
- <width>1081</width>
+ <width>1280</width>
<height>21</height>
</rect>
</property>
@@ -96,6 +96,7 @@
<addaction name="action_Display_Dock_Widget_Headers"/>
<addaction name="action_Show_Filter_Bar"/>
<addaction name="action_Show_Status_Bar"/>
+ <addaction name="action_Reset_Window_Size"/>
<addaction name="separator"/>
<addaction name="menu_View_Debugging"/>
</widget>
@@ -112,6 +113,9 @@
<string>&amp;Help</string>
</property>
<addaction name="action_Report_Compatibility"/>
+ <addaction name="action_Open_Mods_Page"/>
+ <addaction name="action_Open_Quickstart_Guide"/>
+ <addaction name="action_Open_FAQ"/>
<addaction name="separator"/>
<addaction name="action_About"/>
</widget>
@@ -215,6 +219,11 @@
<string>Show Status Bar</string>
</property>
</action>
+ <action name="action_Reset_Window_Size">
+ <property name="text">
+ <string>Reset Window Size</string>
+ </property>
+ </action>
<action name="action_Fullscreen">
<property name="checkable">
<bool>true</bool>
@@ -250,6 +259,21 @@
<bool>false</bool>
</property>
</action>
+ <action name="action_Open_Mods_Page">
+ <property name="text">
+ <string>Open Mods Page</string>
+ </property>
+ </action>
+ <action name="action_Open_Quickstart_Guide">
+ <property name="text">
+ <string>Open Quickstart Guide</string>
+ </property>
+ </action>
+ <action name="action_Open_FAQ">
+ <property name="text">
+ <string>FAQ</string>
+ </property>
+ </action>
<action name="action_Open_yuzu_Folder">
<property name="text">
<string>Open yuzu Folder</string>
diff --git a/src/yuzu/yuzu.rc b/src/yuzu/yuzu.rc
index 1b253653f..4a3645a71 100644
--- a/src/yuzu/yuzu.rc
+++ b/src/yuzu/yuzu.rc
@@ -16,4 +16,4 @@ IDI_ICON1 ICON "../../dist/yuzu.ico"
// RT_MANIFEST
//
-1 RT_MANIFEST "../../dist/yuzu.manifest"
+0 RT_MANIFEST "../../dist/yuzu.manifest"
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 8476a5a16..659b9f701 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -367,6 +367,9 @@ void Config::ReadValues() {
Settings::values.custom_rtc = std::nullopt;
}
+ Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
+ Settings::values.time_zone_index = sdl2_config->GetInteger("System", "time_zone_index", 0);
+
// Core
Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
@@ -377,8 +380,6 @@ void Config::ReadValues() {
Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false);
Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0);
- Settings::values.resolution_factor =
- static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
Settings::values.aspect_ratio =
static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
Settings::values.max_anisotropy =
@@ -394,6 +395,8 @@ void Config::ReadValues() {
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
Settings::values.use_vsync =
static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
+ Settings::values.use_assembly_shaders =
+ sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false);
Settings::values.use_fast_gpu_time =
sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);
@@ -409,8 +412,6 @@ void Config::ReadValues() {
Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
- Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
-
// Miscellaneous
Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace");
Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false);
@@ -429,6 +430,8 @@ void Config::ReadValues() {
Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false);
Settings::values.disable_cpu_opt =
sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false);
+ Settings::values.disable_macro_jit =
+ sdl2_config->GetBoolean("Debugging", "disable_macro_jit", false);
const auto title_list = sdl2_config->Get("AddOns", "title_ids", "");
std::stringstream ss(title_list);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 60b1a62fa..45c07ed5d 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -117,11 +117,6 @@ use_hw_renderer =
# 0: Interpreter (slow), 1 (default): JIT (fast)
use_shader_jit =
-# Resolution scale factor
-# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
-# factor for the Switch resolution
-resolution_factor =
-
# Aspect ratio
# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
aspect_ratio =
@@ -134,6 +129,10 @@ max_anisotropy =
# 0 (default): Off, 1: On
use_vsync =
+# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required.
+# 0 (default): Off, 1: On
+use_assembly_shaders =
+
# Turns on the frame limiter, which will limit frames output to the target game speed
# 0: Off, 1: On (default)
use_frame_limit =
@@ -262,6 +261,10 @@ language_index =
# -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan
region_value =
+# The system time zone that yuzu will use during emulation
+# 0: Auto-select (default), 1: Default (system archive value), Others: Index for specified time zone
+time_zone_index =
+
[Miscellaneous]
# A filter which removes logs below a certain logging level.
# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical
@@ -283,6 +286,8 @@ quest_flag =
# Determines whether or not JIT CPU optimizations are enabled
# false: Optimizations Enabled, true: Optimizations Disabled
disable_cpu_opt =
+# Enables/Disables the macro JIT compiler
+disable_macro_jit=false
[WebService]
# Whether or not to enable telemetry
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 411e7e647..09cc0a3b5 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
+ if (Settings::values.renderer_debug) {
+ SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
+ }
SDL_GL_SetSwapInterval(0);
std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 4d2ea7e9e..e6c6a839d 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <chrono>
#include <iostream>
#include <memory>
#include <string>
@@ -236,9 +237,11 @@ int main(int argc, char** argv) {
system.Renderer().Rasterizer().LoadDiskResources();
std::thread render_thread([&emu_window] { emu_window->Present(); });
+ system.Run();
while (emu_window->IsOpen()) {
- system.RunLoop();
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
+ system.Pause();
render_thread.join();
system.Shutdown();
diff --git a/src/yuzu_cmd/yuzu.rc b/src/yuzu_cmd/yuzu.rc
index 7de8ef3d9..0cde75e2f 100644
--- a/src/yuzu_cmd/yuzu.rc
+++ b/src/yuzu_cmd/yuzu.rc
@@ -14,4 +14,4 @@ YUZU_ICON ICON "../../dist/yuzu.ico"
// RT_MANIFEST
//
-1 RT_MANIFEST "../../dist/yuzu.manifest"
+0 RT_MANIFEST "../../dist/yuzu.manifest"
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 3be58b15d..1566c2e3f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -116,8 +116,6 @@ void Config::ReadValues() {
Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
// Renderer
- Settings::values.resolution_factor =
- static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
Settings::values.aspect_ratio =
static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
Settings::values.max_anisotropy =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index ca203b64d..41bbbbf60 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -21,11 +21,6 @@ use_hw_renderer =
# 0: Interpreter (slow), 1 (default): JIT (fast)
use_shader_jit =
-# Resolution scale factor
-# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
-# factor for the Switch resolution
-resolution_factor =
-
# Aspect ratio
# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
aspect_ratio =
diff --git a/src/yuzu_tester/service/yuzutest.cpp b/src/yuzu_tester/service/yuzutest.cpp
index 85d3f436b..2d3f6e3a7 100644
--- a/src/yuzu_tester/service/yuzutest.cpp
+++ b/src/yuzu_tester/service/yuzutest.cpp
@@ -53,7 +53,7 @@ private:
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(RESULT_SUCCESS);
- rb.Push<u32>(write_size);
+ rb.Push<u32>(static_cast<u32>(write_size));
}
void StartIndividual(Kernel::HLERequestContext& ctx) {
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index 676e70ebd..083667baf 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <chrono>
#include <iostream>
#include <memory>
#include <string>
@@ -255,9 +256,11 @@ int main(int argc, char** argv) {
system.GPU().Start();
system.Renderer().Rasterizer().LoadDiskResources();
+ system.Run();
while (!finished) {
- system.RunLoop();
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
+ system.Pause();
detached_tasks.WaitForAllTasks();
return return_value;
diff --git a/src/yuzu_tester/yuzu.rc b/src/yuzu_tester/yuzu.rc
index 7de8ef3d9..0cde75e2f 100644
--- a/src/yuzu_tester/yuzu.rc
+++ b/src/yuzu_tester/yuzu.rc
@@ -14,4 +14,4 @@ YUZU_ICON ICON "../../dist/yuzu.ico"
// RT_MANIFEST
//
-1 RT_MANIFEST "../../dist/yuzu.manifest"
+0 RT_MANIFEST "../../dist/yuzu.manifest"