diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/audio_core/audio_core.cpp | 19 | ||||
-rw-r--r-- | src/audio_core/audio_core.h | 13 | ||||
-rw-r--r-- | src/audio_core/hle/dsp.cpp | 14 | ||||
-rw-r--r-- | src/audio_core/hle/dsp.h | 21 | ||||
-rw-r--r-- | src/core/core.cpp | 5 | ||||
-rw-r--r-- | src/core/hle/kernel/memory.cpp | 112 | ||||
-rw-r--r-- | src/core/hle/kernel/memory.h | 10 | ||||
-rw-r--r-- | src/core/hle/kernel/process.cpp | 23 | ||||
-rw-r--r-- | src/core/hle/kernel/process.h | 2 | ||||
-rw-r--r-- | src/core/memory.cpp | 8 | ||||
-rw-r--r-- | src/core/memory.h | 10 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 212 | ||||
-rw-r--r-- | src/video_core/shader/shader.h | 7 | ||||
-rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.h | 14 |
17 files changed, 328 insertions, 150 deletions
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp index 84f9c03a7..9c2e6ed88 100644 --- a/src/audio_core/audio_core.cpp +++ b/src/audio_core/audio_core.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <memory> #include <string> #include "audio_core/audio_core.h" @@ -10,8 +11,8 @@ #include "audio_core/null_sink.h" #include "audio_core/sink.h" #include "audio_core/sink_details.h" +#include "common/common_types.h" #include "core/core_timing.h" -#include "core/hle/kernel/vm_manager.h" #include "core/hle/service/dsp_dsp.h" namespace AudioCore { @@ -39,20 +40,8 @@ void Init() { CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event); } -void AddAddressSpace(Kernel::VMManager& address_space) { - auto r0_vma = address_space - .MapBackingMemory(DSP::HLE::region0_base, - reinterpret_cast<u8*>(&DSP::HLE::g_regions[0]), - sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO) - .MoveFrom(); - address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite); - - auto r1_vma = address_space - .MapBackingMemory(DSP::HLE::region1_base, - reinterpret_cast<u8*>(&DSP::HLE::g_regions[1]), - sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO) - .MoveFrom(); - address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite); +std::array<u8, Memory::DSP_RAM_SIZE>& GetDspMemory() { + return DSP::HLE::g_dsp_memory.raw_memory; } void SelectSink(std::string sink_id) { diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h index 0edf6dd15..ab323ce1f 100644 --- a/src/audio_core/audio_core.h +++ b/src/audio_core/audio_core.h @@ -4,11 +4,10 @@ #pragma once +#include <array> #include <string> - -namespace Kernel { -class VMManager; -} +#include "common/common_types.h" +#include "core/memory.h" namespace AudioCore { @@ -17,8 +16,8 @@ constexpr int native_sample_rate = 32728; ///< 32kHz /// Initialise Audio Core void Init(); -/// Add DSP address spaces to a Process. -void AddAddressSpace(Kernel::VMManager& vm_manager); +/// Returns a reference to the array backing DSP memory +std::array<u8, Memory::DSP_RAM_SIZE>& GetDspMemory(); /// Select the sink to use based on sink id. void SelectSink(std::string sink_id); @@ -29,4 +28,4 @@ void EnableStretching(bool enable); /// Shutdown Audio Core void Shutdown(); -} // namespace +} // namespace AudioCore diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp index 31421fdc6..260b182ed 100644 --- a/src/audio_core/hle/dsp.cpp +++ b/src/audio_core/hle/dsp.cpp @@ -16,31 +16,33 @@ namespace HLE { // Region management -std::array<SharedMemory, 2> g_regions; +DspMemory g_dsp_memory; static size_t CurrentRegionIndex() { // The region with the higher frame counter is chosen unless there is wraparound. // This function only returns a 0 or 1. + u16 frame_counter_0 = g_dsp_memory.region_0.frame_counter; + u16 frame_counter_1 = g_dsp_memory.region_1.frame_counter; - if (g_regions[0].frame_counter == 0xFFFFu && g_regions[1].frame_counter != 0xFFFEu) { + if (frame_counter_0 == 0xFFFFu && frame_counter_1 != 0xFFFEu) { // Wraparound has occurred. return 1; } - if (g_regions[1].frame_counter == 0xFFFFu && g_regions[0].frame_counter != 0xFFFEu) { + if (frame_counter_1 == 0xFFFFu && frame_counter_0 != 0xFFFEu) { // Wraparound has occurred. return 0; } - return (g_regions[0].frame_counter > g_regions[1].frame_counter) ? 0 : 1; + return (frame_counter_0 > frame_counter_1) ? 0 : 1; } static SharedMemory& ReadRegion() { - return g_regions[CurrentRegionIndex()]; + return CurrentRegionIndex() == 0 ? g_dsp_memory.region_0 : g_dsp_memory.region_1; } static SharedMemory& WriteRegion() { - return g_regions[1 - CurrentRegionIndex()]; + return CurrentRegionIndex() != 0 ? g_dsp_memory.region_0 : g_dsp_memory.region_1; } // Audio processing and mixing diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h index 0a0f60ac1..94ce48863 100644 --- a/src/audio_core/hle/dsp.h +++ b/src/audio_core/hle/dsp.h @@ -31,8 +31,8 @@ namespace HLE { // double-buffer. The frame counter is located as the very last u16 of each region and is // incremented each audio tick. -constexpr VAddr region0_base = 0x1FF50000; -constexpr VAddr region1_base = 0x1FF70000; +constexpr u32 region0_offset = 0x50000; +constexpr u32 region1_offset = 0x70000; /** * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from @@ -512,7 +512,22 @@ struct SharedMemory { }; ASSERT_DSP_STRUCT(SharedMemory, 0x8000); -extern std::array<SharedMemory, 2> g_regions; +union DspMemory { + std::array<u8, 0x80000> raw_memory; + struct { + u8 unused_0[0x50000]; + SharedMemory region_0; + u8 unused_1[0x18000]; + SharedMemory region_1; + u8 unused_2[0x8000]; + }; +}; +static_assert(offsetof(DspMemory, region_0) == region0_offset, + "DSP region 0 is at the wrong offset"); +static_assert(offsetof(DspMemory, region_1) == region1_offset, + "DSP region 1 is at the wrong offset"); + +extern DspMemory g_dsp_memory; // Structures must have an offset that is a multiple of two. static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, diff --git a/src/core/core.cpp b/src/core/core.cpp index 140ff6451..881f1e93c 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -13,11 +13,11 @@ #include "core/core_timing.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/memory.h" #include "core/hle/kernel/thread.h" #include "core/hle/service/service.h" #include "core/hw/hw.h" #include "core/loader/loader.h" +#include "core/memory_setup.h" #include "core/settings.h" #include "video_core/video_core.h" @@ -123,7 +123,8 @@ void System::Reschedule() { } System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) { - Memory::Init(); + Memory::InitMemoryMap(); + LOG_DEBUG(HW_Memory, "initialized OK"); if (Settings::values.use_cpu_jit) { cpu_core = std::make_unique<ARM_Dynarmic>(USER32MODE); diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp index 33c165197..8250a90b5 100644 --- a/src/core/hle/kernel/memory.cpp +++ b/src/core/hle/kernel/memory.cpp @@ -2,11 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cinttypes> #include <map> #include <memory> #include <utility> #include <vector> #include "audio_core/audio_core.h" +#include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "core/hle/config_mem.h" @@ -92,52 +94,96 @@ MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) { UNREACHABLE(); } } -} - -namespace Memory { -namespace { +std::array<u8, Memory::VRAM_SIZE> vram; +std::array<u8, Memory::N3DS_EXTRA_RAM_SIZE> n3ds_extra_ram; + +void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mapping) { + using namespace Memory; + + struct MemoryArea { + VAddr vaddr_base; + PAddr paddr_base; + u32 size; + }; + + // The order of entries in this array is important. The VRAM and IO VAddr ranges overlap, and + // VRAM must be tried first. + static constexpr MemoryArea memory_areas[] = { + {VRAM_VADDR, VRAM_PADDR, VRAM_SIZE}, + {IO_AREA_VADDR, IO_AREA_PADDR, IO_AREA_SIZE}, + {DSP_RAM_VADDR, DSP_RAM_PADDR, DSP_RAM_SIZE}, + {N3DS_EXTRA_RAM_VADDR, N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE - 0x20000}, + }; + + VAddr mapping_limit = mapping.address + mapping.size; + if (mapping_limit < mapping.address) { + LOG_CRITICAL(Loader, "Mapping size overflowed: address=0x%08" PRIX32 " size=0x%" PRIX32, + mapping.address, mapping.size); + return; + } -struct MemoryArea { - u32 base; - u32 size; - const char* name; -}; + auto area = + std::find_if(std::begin(memory_areas), std::end(memory_areas), [&](const auto& area) { + return mapping.address >= area.vaddr_base && + mapping_limit <= area.vaddr_base + area.size; + }); + if (area == std::end(memory_areas)) { + LOG_ERROR(Loader, "Unhandled special mapping: address=0x%08" PRIX32 " size=0x%" PRIX32 + " read_only=%d unk_flag=%d", + mapping.address, mapping.size, mapping.read_only, mapping.unk_flag); + return; + } -// We don't declare the IO regions in here since its handled by other means. -static MemoryArea memory_areas[] = { - {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) -}; -} + u32 offset_into_region = mapping.address - area->vaddr_base; + if (area->paddr_base == IO_AREA_PADDR) { + LOG_ERROR(Loader, "MMIO mappings are not supported yet. phys_addr=0x%08" PRIX32, + area->paddr_base + offset_into_region); + return; + } -void Init() { - InitMemoryMap(); - LOG_DEBUG(HW_Memory, "initialized OK"); -} + // TODO(yuriks): Use GetPhysicalPointer when that becomes independent of the virtual + // mappings. + u8* target_pointer = nullptr; + switch (area->paddr_base) { + case VRAM_PADDR: + target_pointer = vram.data(); + break; + case DSP_RAM_PADDR: + target_pointer = AudioCore::GetDspMemory().data(); + break; + case N3DS_EXTRA_RAM_PADDR: + target_pointer = n3ds_extra_ram.data(); + break; + default: + UNREACHABLE(); + } -void InitLegacyAddressSpace(Kernel::VMManager& address_space) { - using namespace Kernel; + // TODO(yuriks): This flag seems to have some other effect, but it's unknown what + MemoryState memory_state = mapping.unk_flag ? MemoryState::Static : MemoryState::IO; - for (MemoryArea& area : memory_areas) { - auto block = std::make_shared<std::vector<u8>>(area.size); - address_space - .MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private) - .Unwrap(); - } + auto vma = address_space + .MapBackingMemory(mapping.address, target_pointer + offset_into_region, + mapping.size, memory_state) + .MoveFrom(); + address_space.Reprotect(vma, + mapping.read_only ? VMAPermission::Read : VMAPermission::ReadWrite); +} +void MapSharedPages(VMManager& address_space) { auto cfg_mem_vma = address_space - .MapBackingMemory(CONFIG_MEMORY_VADDR, (u8*)&ConfigMem::config_mem, - CONFIG_MEMORY_SIZE, MemoryState::Shared) + .MapBackingMemory(Memory::CONFIG_MEMORY_VADDR, + reinterpret_cast<u8*>(&ConfigMem::config_mem), + Memory::CONFIG_MEMORY_SIZE, MemoryState::Shared) .MoveFrom(); address_space.Reprotect(cfg_mem_vma, VMAPermission::Read); auto shared_page_vma = address_space - .MapBackingMemory(SHARED_PAGE_VADDR, (u8*)&SharedPage::shared_page, - SHARED_PAGE_SIZE, MemoryState::Shared) + .MapBackingMemory(Memory::SHARED_PAGE_VADDR, + reinterpret_cast<u8*>(&SharedPage::shared_page), + Memory::SHARED_PAGE_SIZE, MemoryState::Shared) .MoveFrom(); address_space.Reprotect(shared_page_vma, VMAPermission::Read); - - AudioCore::AddAddressSpace(address_space); } -} // namespace +} // namespace Kernel diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h index 4e1856a41..08c1a9989 100644 --- a/src/core/hle/kernel/memory.h +++ b/src/core/hle/kernel/memory.h @@ -23,11 +23,7 @@ struct MemoryRegionInfo { void MemoryInit(u32 mem_type); void MemoryShutdown(); MemoryRegionInfo* GetMemoryRegion(MemoryRegion region); -} -namespace Memory { - -void Init(); -void InitLegacyAddressSpace(Kernel::VMManager& address_space); - -} // namespace +void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mapping); +void MapSharedPages(VMManager& address_space); +} // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index ba80fe7f8..32cb25fb7 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -35,7 +35,6 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) { process->codeset = std::move(code_set); process->flags.raw = 0; process->flags.memory_region.Assign(MemoryRegion::APPLICATION); - Memory::InitLegacyAddressSpace(process->vm_manager); return process; } @@ -78,8 +77,15 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) { AddressMapping mapping; mapping.address = descriptor << 12; - mapping.size = (end_desc << 12) - mapping.address; - mapping.writable = (descriptor & (1 << 20)) != 0; + VAddr end_address = end_desc << 12; + + if (mapping.address < end_address) { + mapping.size = end_address - mapping.address; + } else { + mapping.size = 0; + } + + mapping.read_only = (descriptor & (1 << 20)) != 0; mapping.unk_flag = (end_desc & (1 << 20)) != 0; address_mappings.push_back(mapping); @@ -88,8 +94,10 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) { AddressMapping mapping; mapping.address = descriptor << 12; mapping.size = Memory::PAGE_SIZE; - mapping.writable = true; // TODO: Not sure if correct + mapping.read_only = false; mapping.unk_flag = false; + + address_mappings.push_back(mapping); } else if ((type & 0xFE0) == 0xFC0) { // 0x01FF // Kernel version kernel_version = descriptor & 0xFFFF; @@ -131,6 +139,12 @@ void Process::Run(s32 main_thread_priority, u32 stack_size) { misc_memory_used += stack_size; memory_region->used += stack_size; + // Map special address mappings + MapSharedPages(vm_manager); + for (const auto& mapping : address_mappings) { + HandleSpecialMapping(vm_manager, mapping); + } + vm_manager.LogLayout(Log::Level::Debug); Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority); } @@ -138,6 +152,7 @@ void Process::Run(s32 main_thread_priority, u32 stack_size) { VAddr Process::GetLinearHeapAreaAddress() const { return kernel_version < 0x22C ? Memory::LINEAR_HEAP_VADDR : Memory::NEW_LINEAR_HEAP_VADDR; } + VAddr Process::GetLinearHeapBase() const { return GetLinearHeapAreaAddress() + memory_region->base; } diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index b566950b0..b52211d2a 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -20,7 +20,7 @@ struct AddressMapping { // Address and size must be page-aligned VAddr address; u32 size; - bool writable; + bool read_only; bool unk_flag; }; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 65e4bba85..b8438e490 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -672,12 +672,14 @@ PAddr VirtualToPhysicalAddress(const VAddr addr) { return addr - VRAM_VADDR + VRAM_PADDR; } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) { return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR; + } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) { + return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR; } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) { return addr - DSP_RAM_VADDR + DSP_RAM_PADDR; } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) { return addr - IO_AREA_VADDR + IO_AREA_PADDR; - } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) { - return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR; + } else if (addr >= N3DS_EXTRA_RAM_VADDR && addr < N3DS_EXTRA_RAM_VADDR_END) { + return addr - N3DS_EXTRA_RAM_VADDR + N3DS_EXTRA_RAM_PADDR; } LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08X", addr); @@ -696,6 +698,8 @@ VAddr PhysicalToVirtualAddress(const PAddr addr) { return addr - DSP_RAM_PADDR + DSP_RAM_VADDR; } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) { return addr - IO_AREA_PADDR + IO_AREA_VADDR; + } else if (addr >= N3DS_EXTRA_RAM_PADDR && addr < N3DS_EXTRA_RAM_PADDR_END) { + return addr - N3DS_EXTRA_RAM_PADDR + N3DS_EXTRA_RAM_VADDR; } LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08X", addr); diff --git a/src/core/memory.h b/src/core/memory.h index 903b58a22..802aa465e 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -37,6 +37,12 @@ enum : PAddr { VRAM_SIZE = 0x00600000, ///< VRAM size (6MB) VRAM_PADDR_END = VRAM_PADDR + VRAM_SIZE, + /// New 3DS additional memory. Supposedly faster than regular FCRAM. Part of it can be used by + /// applications and system modules if mapped via the ExHeader. + N3DS_EXTRA_RAM_PADDR = 0x1F000000, + N3DS_EXTRA_RAM_SIZE = 0x00400000, ///< New 3DS additional memory size (4MB) + N3DS_EXTRA_RAM_PADDR_END = N3DS_EXTRA_RAM_PADDR + N3DS_EXTRA_RAM_SIZE, + /// DSP memory DSP_RAM_PADDR = 0x1FF00000, DSP_RAM_SIZE = 0x00080000, ///< DSP memory size (512KB) @@ -81,6 +87,10 @@ enum : VAddr { LINEAR_HEAP_SIZE = 0x08000000, LINEAR_HEAP_VADDR_END = LINEAR_HEAP_VADDR + LINEAR_HEAP_SIZE, + /// Maps 1:1 to New 3DS additional memory + N3DS_EXTRA_RAM_VADDR = 0x1E800000, + N3DS_EXTRA_RAM_VADDR_END = N3DS_EXTRA_RAM_VADDR + N3DS_EXTRA_RAM_SIZE, + /// Maps 1:1 to the IO register area. IO_AREA_VADDR = 0x1EC00000, IO_AREA_VADDR_END = IO_AREA_VADDR + IO_AREA_SIZE, diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 2e32ff905..9a09f81dc 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -32,12 +32,13 @@ namespace Pica { namespace CommandProcessor { -static int float_regs_counter = 0; +static int vs_float_regs_counter = 0; +static u32 vs_uniform_write_buffer[4]; -static u32 uniform_write_buffer[4]; +static int gs_float_regs_counter = 0; +static u32 gs_uniform_write_buffer[4]; static int default_attr_counter = 0; - static u32 default_attr_write_buffer[3]; // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF @@ -48,6 +49,97 @@ static const u32 expand_bits_to_bytes[] = { MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); +static const char* GetShaderSetupTypeName(Shader::ShaderSetup& setup) { + if (&setup == &g_state.vs) { + return "vertex shader"; + } + if (&setup == &g_state.gs) { + return "geometry shader"; + } + return "unknown shader"; +} + +static void WriteUniformBoolReg(Shader::ShaderSetup& setup, u32 value) { + for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) + setup.uniforms.b[i] = (value & (1 << i)) != 0; +} + +static void WriteUniformIntReg(Shader::ShaderSetup& setup, unsigned index, + const Math::Vec4<u8>& values) { + ASSERT(index < setup.uniforms.i.size()); + setup.uniforms.i[index] = values; + LOG_TRACE(HW_GPU, "Set %s integer uniform %d to %02x %02x %02x %02x", + GetShaderSetupTypeName(setup), index, values.x, values.y, values.z, values.w); +} + +static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, + int& float_regs_counter, u32 uniform_write_buffer[4], u32 value) { + auto& uniform_setup = config.uniform_setup; + + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + uniform_write_buffer[float_regs_counter++] = value; + + // Uniforms are written in a packed format such that four float24 values are encoded in + // three 32-bit numbers. We write to internal memory once a full such vector is + // written. + if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || + (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { + float_regs_counter = 0; + + auto& uniform = setup.uniforms.f[uniform_setup.index]; + + if (uniform_setup.index >= 96) { + LOG_ERROR(HW_GPU, "Invalid %s float uniform index %d", GetShaderSetupTypeName(setup), + (int)uniform_setup.index); + } else { + + // NOTE: The destination component order indeed is "backwards" + if (uniform_setup.IsFloat32()) { + for (auto i : {0, 1, 2, 3}) + uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); + } else { + // TODO: Untested + uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); + uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | + ((uniform_write_buffer[1] >> 16) & 0xFFFF)); + uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | + ((uniform_write_buffer[2] >> 24) & 0xFF)); + uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); + } + + LOG_TRACE(HW_GPU, "Set %s float uniform %x to (%f %f %f %f)", + GetShaderSetupTypeName(setup), (int)uniform_setup.index, + uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), + uniform.w.ToFloat32()); + + // TODO: Verify that this actually modifies the register! + uniform_setup.index.Assign(uniform_setup.index + 1); + } + } +} + +static void WriteProgramCode(ShaderRegs& config, Shader::ShaderSetup& setup, + unsigned max_program_code_length, u32 value) { + if (config.program.offset >= max_program_code_length) { + LOG_ERROR(HW_GPU, "Invalid %s program offset %d", GetShaderSetupTypeName(setup), + (int)config.program.offset); + } else { + setup.program_code[config.program.offset] = value; + config.program.offset++; + } +} + +static void WriteSwizzlePatterns(ShaderRegs& config, Shader::ShaderSetup& setup, u32 value) { + if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) { + LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", GetShaderSetupTypeName(setup), + (int)config.swizzle_patterns.offset); + } else { + setup.swizzle_data[config.swizzle_patterns.offset] = value; + config.swizzle_patterns.offset++; + } +} + static void WritePicaReg(u32 id, u32 value, u32 mask) { auto& regs = g_state.regs; @@ -330,21 +422,70 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { break; } - case PICA_REG_INDEX(vs.bool_uniforms): - for (unsigned i = 0; i < 16; ++i) - g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; + case PICA_REG_INDEX(gs.bool_uniforms): + WriteUniformBoolReg(g_state.gs, value); + break; + case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281): + case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[1], 0x282): + case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[2], 0x283): + case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[3], 0x284): { + unsigned index = (id - PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281)); + auto values = regs.gs.int_uniforms[index]; + WriteUniformIntReg(g_state.gs, index, + Math::Vec4<u8>(values.x, values.y, values.z, values.w)); + break; + } + + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[0], 0x291): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[1], 0x292): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[2], 0x293): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[3], 0x294): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[4], 0x295): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[5], 0x296): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[6], 0x297): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[7], 0x298): { + WriteUniformFloatReg(g_state.regs.gs, g_state.gs, gs_float_regs_counter, + gs_uniform_write_buffer, value); + break; + } + + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[0], 0x29c): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[1], 0x29d): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[2], 0x29e): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[3], 0x29f): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[4], 0x2a0): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3): { + WriteProgramCode(g_state.regs.gs, g_state.gs, 4096, value); + break; + } + + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[0], 0x2a6): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[1], 0x2a7): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[2], 0x2a8): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[3], 0x2a9): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[4], 0x2aa): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad): { + WriteSwizzlePatterns(g_state.regs.gs, g_state.gs, value); + break; + } + + case PICA_REG_INDEX(vs.bool_uniforms): + WriteUniformBoolReg(g_state.vs, value); break; case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { - int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); + unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); auto values = regs.vs.int_uniforms[index]; - g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); - LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), - values.y.Value(), values.z.Value(), values.w.Value()); + WriteUniformIntReg(g_state.vs, index, + Math::Vec4<u8>(values.x, values.y, values.z, values.w)); break; } @@ -356,51 +497,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { - auto& uniform_setup = regs.vs.uniform_setup; - - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - uniform_write_buffer[float_regs_counter++] = value; - - // Uniforms are written in a packed format such that four float24 values are encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || - (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { - float_regs_counter = 0; - - auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; - - if (uniform_setup.index > 95) { - LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); - break; - } - - // NOTE: The destination component order indeed is "backwards" - if (uniform_setup.IsFloat32()) { - for (auto i : {0, 1, 2, 3}) - uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); - } else { - // TODO: Untested - uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); - uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | - ((uniform_write_buffer[1] >> 16) & 0xFFFF)); - uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | - ((uniform_write_buffer[2] >> 24) & 0xFF)); - uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); - } - - LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, - uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), - uniform.w.ToFloat32()); - - // TODO: Verify that this actually modifies the register! - uniform_setup.index.Assign(uniform_setup.index + 1); - } + WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter, + vs_uniform_write_buffer, value); break; } - // Load shader program code case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): @@ -409,12 +510,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { - g_state.vs.program_code[regs.vs.program.offset] = value; - regs.vs.program.offset++; + WriteProgramCode(g_state.regs.vs, g_state.vs, 512, value); break; } - // Load swizzle pattern data case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): @@ -423,8 +522,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { - g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; - regs.vs.swizzle_patterns.offset++; + WriteSwizzlePatterns(g_state.regs.vs, g_state.vs, value); break; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 38ea717ab..e156f6aef 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -24,6 +24,9 @@ namespace Pica { namespace Shader { +constexpr unsigned MAX_PROGRAM_CODE_LENGTH = 4096; +constexpr unsigned MAX_SWIZZLE_DATA_LENGTH = 4096; + struct AttributeBuffer { alignas(16) Math::Vec4<float24> attr[16]; }; @@ -144,8 +147,8 @@ struct ShaderSetup { return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); } - std::array<u32, 1024> program_code; - std::array<u32, 1024> swizzle_data; + std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code; + std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data; /// Data private to ShaderEngines struct EngineData { diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index f4d1c46c5..aa1cec81f 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -653,7 +653,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { - ASSERT(entry_point < 1024); + ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); setup.engine_data.entry_point = entry_point; } diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 0ee0dd9ef..73c21871c 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -15,7 +15,7 @@ JitX64Engine::JitX64Engine() = default; JitX64Engine::~JitX64Engine() = default; void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { - ASSERT(entry_point < 1024); + ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); setup.engine_data.entry_point = entry_point; u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 2dbc8b147..5d9b6448c 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -834,8 +834,8 @@ void JitShader::FindReturnOffsets() { std::sort(return_offsets.begin(), return_offsets.end()); } -void JitShader::Compile(const std::array<u32, 1024>* program_code_, - const std::array<u32, 1024>* swizzle_data_) { +void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code_, + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data_) { program_code = program_code_; swizzle_data = swizzle_data_; diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index f27675560..31af0ca48 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -22,8 +22,8 @@ namespace Pica { namespace Shader { -/// Memory allocated for each compiled shader (64Kb) -constexpr size_t MAX_SHADER_SIZE = 1024 * 64; +/// Memory allocated for each compiled shader +constexpr size_t MAX_SHADER_SIZE = MAX_PROGRAM_CODE_LENGTH * 64; /** * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 @@ -37,8 +37,8 @@ public: program(&setup, &state, instruction_labels[offset].getAddress()); } - void Compile(const std::array<u32, 1024>* program_code, - const std::array<u32, 1024>* swizzle_data); + void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code, + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data); void Compile_ADD(Instruction instr); void Compile_DP3(Instruction instr); @@ -104,11 +104,11 @@ private: */ void FindReturnOffsets(); - const std::array<u32, 1024>* program_code = nullptr; - const std::array<u32, 1024>* swizzle_data = nullptr; + const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr; + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr; /// Mapping of Pica VS instructions to pointers in the emitted code - std::array<Xbyak::Label, 1024> instruction_labels; + std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels; /// Offsets in code where a return needs to be inserted std::vector<unsigned> return_offsets; |