diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/hle/kernel/shared_memory.cpp | 27 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 6 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.h | 6 | ||||
-rw-r--r-- | src/core/hle/service/apt/apt.cpp | 73 | ||||
-rw-r--r-- | src/core/hle/service/apt/apt_s.cpp | 4 | ||||
-rw-r--r-- | src/core/hle/service/nim/nim.cpp | 18 | ||||
-rw-r--r-- | src/core/hle/service/nim/nim.h | 11 | ||||
-rw-r--r-- | src/core/hle/service/nim/nim_u.cpp | 2 | ||||
-rw-r--r-- | src/core/memory.cpp | 67 | ||||
-rw-r--r-- | src/core/memory.h | 4 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 439 |
11 files changed, 371 insertions, 286 deletions
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index 02d5a7a36..d45daca35 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp @@ -55,22 +55,19 @@ SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); } } else { - // TODO(Subv): What happens if an application tries to create multiple memory blocks - // pointing to the same address? auto& vm_manager = shared_memory->owner_process->vm_manager; // The memory is already available and mapped in the owner process. - auto vma = vm_manager.FindVMA(address)->second; - // Copy it over to our own storage - shared_memory->backing_block = std::make_shared<std::vector<u8>>( - vma.backing_block->data() + vma.offset, vma.backing_block->data() + vma.offset + size); - shared_memory->backing_block_offset = 0; - // Unmap the existing pages - vm_manager.UnmapRange(address, size); - // Map our own block into the address space - vm_manager.MapMemoryBlock(address, shared_memory->backing_block, 0, size, - MemoryState::Shared); - // Reprotect the block with the new permissions - vm_manager.ReprotectRange(address, size, ConvertPermissions(permissions)); + auto vma = vm_manager.FindVMA(address); + ASSERT_MSG(vma != vm_manager.vma_map.end(), "Invalid memory address"); + ASSERT_MSG(vma->second.backing_block, "Backing block doesn't exist for address"); + + // The returned VMA might be a bigger one encompassing the desired address. + auto vma_offset = address - vma->first; + ASSERT_MSG(vma_offset + size <= vma->second.size, + "Shared memory exceeds bounds of mapped block"); + + shared_memory->backing_block = vma->second.backing_block; + shared_memory->backing_block_offset = vma->second.offset + vma_offset; } shared_memory->base_address = address; @@ -184,4 +181,4 @@ u8* SharedMemory::GetPointer(u32 offset) { return backing_block->data() + backing_block_offset + offset; } -} // namespace +} // namespace Kernel diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 2614a260c..0f7970ebe 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -541,6 +541,12 @@ s32 Thread::GetWaitObjectIndex(WaitObject* object) const { return static_cast<s32>(std::distance(match, wait_objects.rend()) - 1); } +VAddr Thread::GetCommandBufferAddress() const { + // Offset from the start of TLS at which the IPC command buffer begins. + static constexpr int CommandHeaderOffset = 0x80; + return GetTLSAddress() + CommandHeaderOffset; +} + //////////////////////////////////////////////////////////////////////////////////////////////////// void ThreadingInit() { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 4679c2022..314fba81f 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -164,6 +164,12 @@ public: return tls_address; } + /* + * Returns the address of the current thread's command buffer, located in the TLS. + * @returns VAddr of the thread's command buffer. + */ + VAddr GetCommandBufferAddress() const; + /** * Returns whether this thread is waiting for all the objects in * its wait list to become ready, as a result of a WaitSynchronizationN call diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp index 2f7362748..59ea9823d 100644 --- a/src/core/hle/service/apt/apt.cpp +++ b/src/core/hle/service/apt/apt.cpp @@ -171,7 +171,11 @@ void SendParameter(const MessageParameter& parameter) { next_parameter = parameter; // Signal the event to let the receiver know that a new parameter is ready to be read auto* const slot_data = GetAppletSlotData(static_cast<AppletId>(parameter.destination_id)); - ASSERT(slot_data); + if (slot_data == nullptr) { + LOG_DEBUG(Service_APT, "No applet was registered with the id %03X", + parameter.destination_id); + return; + } slot_data->parameter_event->Signal(); } @@ -505,9 +509,6 @@ void SendParameter(Service::Interface* self) { size_t size; VAddr buffer = rp.PopStaticBuffer(&size); - std::shared_ptr<HLE::Applets::Applet> dest_applet = - HLE::Applets::Applet::Get(static_cast<AppletId>(dst_app_id)); - LOG_DEBUG(Service_APT, "called src_app_id=0x%08X, dst_app_id=0x%08X, signal_type=0x%08X," "buffer_size=0x%08X, handle=0x%08X, size=0x%08zX, in_param_buffer_ptr=0x%08X", @@ -522,12 +523,6 @@ void SendParameter(Service::Interface* self) { return; } - if (dest_applet == nullptr) { - LOG_ERROR(Service_APT, "Unknown applet id=0x%08X", dst_app_id); - rb.Push<u32>(-1); // TODO(Subv): Find the right error code - return; - } - MessageParameter param; param.destination_id = dst_app_id; param.sender_id = src_app_id; @@ -536,7 +531,14 @@ void SendParameter(Service::Interface* self) { param.buffer.resize(buffer_size); Memory::ReadBlock(buffer, param.buffer.data(), param.buffer.size()); - rb.Push(dest_applet->ReceiveParameter(param)); + SendParameter(param); + + // If the applet is running in HLE mode, use the HLE interface to communicate with it. + if (auto dest_applet = HLE::Applets::Applet::Get(static_cast<AppletId>(dst_app_id))) { + rb.Push(dest_applet->ReceiveParameter(param)); + } else { + rb.Push(RESULT_SUCCESS); + } } void ReceiveParameter(Service::Interface* self) { @@ -765,7 +767,12 @@ void PrepareToStartLibraryApplet(Service::Interface* self) { IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x18, 1, 0); // 0x180040 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>()); + LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id); + IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); + + // TODO(Subv): Launch the requested applet application. + auto applet = HLE::Applets::Applet::Get(applet_id); if (applet) { LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id); @@ -773,7 +780,6 @@ void PrepareToStartLibraryApplet(Service::Interface* self) { } else { rb.Push(HLE::Applets::Applet::Create(applet_id)); } - LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id); } void PrepareToStartNewestHomeMenu(Service::Interface* self) { @@ -794,7 +800,12 @@ void PreloadLibraryApplet(Service::Interface* self) { IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x16, 1, 0); // 0x160040 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>()); + LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id); + IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); + + // TODO(Subv): Launch the requested applet application. + auto applet = HLE::Applets::Applet::Get(applet_id); if (applet) { LOG_WARNING(Service_APT, "applet has already been started id=%08X", applet_id); @@ -802,34 +813,40 @@ void PreloadLibraryApplet(Service::Interface* self) { } else { rb.Push(HLE::Applets::Applet::Create(applet_id)); } - LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id); } void StartLibraryApplet(Service::Interface* self) { IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x1E, 2, 4); // 0x1E0084 AppletId applet_id = static_cast<AppletId>(rp.Pop<u32>()); - std::shared_ptr<HLE::Applets::Applet> applet = HLE::Applets::Applet::Get(applet_id); - - LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id); - - if (applet == nullptr) { - LOG_ERROR(Service_APT, "unknown applet id=%08X", applet_id); - IPC::RequestBuilder rb = rp.MakeBuilder(1, 0, false); - rb.Push<u32>(-1); // TODO(Subv): Find the right error code - return; - } size_t buffer_size = rp.Pop<u32>(); Kernel::Handle handle = rp.PopHandle(); VAddr buffer_addr = rp.PopStaticBuffer(); - AppletStartupParameter parameter; - parameter.object = Kernel::g_handle_table.GetGeneric(handle); - parameter.buffer.resize(buffer_size); - Memory::ReadBlock(buffer_addr, parameter.buffer.data(), parameter.buffer.size()); + LOG_DEBUG(Service_APT, "called applet_id=%08X", applet_id); IPC::RequestBuilder rb = rp.MakeBuilder(1, 0); - rb.Push(applet->Start(parameter)); + + // Send the Wakeup signal to the applet + MessageParameter param; + param.destination_id = static_cast<u32>(applet_id); + param.sender_id = static_cast<u32>(AppletId::Application); + param.object = Kernel::g_handle_table.GetGeneric(handle); + param.signal = static_cast<u32>(SignalType::Wakeup); + param.buffer.resize(buffer_size); + Memory::ReadBlock(buffer_addr, param.buffer.data(), param.buffer.size()); + SendParameter(param); + + // In case the applet is being HLEd, attempt to communicate with it. + if (auto applet = HLE::Applets::Applet::Get(applet_id)) { + AppletStartupParameter parameter; + parameter.object = Kernel::g_handle_table.GetGeneric(handle); + parameter.buffer.resize(buffer_size); + Memory::ReadBlock(buffer_addr, parameter.buffer.data(), parameter.buffer.size()); + rb.Push(applet->Start(parameter)); + } else { + rb.Push(RESULT_SUCCESS); + } } void CancelLibraryApplet(Service::Interface* self) { diff --git a/src/core/hle/service/apt/apt_s.cpp b/src/core/hle/service/apt/apt_s.cpp index fe1d21fff..bb78ee7d7 100644 --- a/src/core/hle/service/apt/apt_s.cpp +++ b/src/core/hle/service/apt/apt_s.cpp @@ -20,7 +20,7 @@ const Interface::FunctionInfo FunctionTable[] = { {0x00090040, IsRegistered, "IsRegistered"}, {0x000A0040, nullptr, "GetAttribute"}, {0x000B0040, InquireNotification, "InquireNotification"}, - {0x000C0104, nullptr, "SendParameter"}, + {0x000C0104, SendParameter, "SendParameter"}, {0x000D0080, ReceiveParameter, "ReceiveParameter"}, {0x000E0080, GlanceParameter, "GlanceParameter"}, {0x000F0100, nullptr, "CancelParameter"}, @@ -38,7 +38,7 @@ const Interface::FunctionInfo FunctionTable[] = { {0x001B00C4, nullptr, "StartApplication"}, {0x001C0000, nullptr, "WakeupApplication"}, {0x001D0000, nullptr, "CancelApplication"}, - {0x001E0084, nullptr, "StartLibraryApplet"}, + {0x001E0084, StartLibraryApplet, "StartLibraryApplet"}, {0x001F0084, nullptr, "StartSystemApplet"}, {0x00200044, nullptr, "StartNewestHomeMenu"}, {0x00210000, nullptr, "OrderToCloseApplication"}, diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp index d5624fe54..b10d5852b 100644 --- a/src/core/hle/service/nim/nim.cpp +++ b/src/core/hle/service/nim/nim.cpp @@ -5,6 +5,8 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/hle/ipc.h" +#include "core/hle/ipc_helpers.h" +#include "core/hle/kernel/event.h" #include "core/hle/service/nim/nim.h" #include "core/hle/service/nim/nim_aoc.h" #include "core/hle/service/nim/nim_s.h" @@ -14,6 +16,16 @@ namespace Service { namespace NIM { +static Kernel::SharedPtr<Kernel::Event> nim_system_update_event; + +void CheckForSysUpdateEvent(Service::Interface* self) { + IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x5, 0, 0); // 0x50000 + IPC::RequestBuilder rb = rp.MakeBuilder(1, 2); + rb.Push(RESULT_SUCCESS); + rb.PushCopyHandles(Kernel::g_handle_table.Create(nim_system_update_event).Unwrap()); + LOG_TRACE(Service_NIM, "called"); +} + void CheckSysUpdateAvailable(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); @@ -29,9 +41,13 @@ void Init() { AddService(new NIM_AOC_Interface); AddService(new NIM_S_Interface); AddService(new NIM_U_Interface); + + nim_system_update_event = Kernel::Event::Create(ResetType::OneShot, "NIM System Update Event"); } -void Shutdown() {} +void Shutdown() { + nim_system_update_event = nullptr; +} } // namespace NIM diff --git a/src/core/hle/service/nim/nim.h b/src/core/hle/service/nim/nim.h index c3106f18b..dbf605e5a 100644 --- a/src/core/hle/service/nim/nim.h +++ b/src/core/hle/service/nim/nim.h @@ -11,6 +11,17 @@ class Interface; namespace NIM { /** + * NIM::CheckForSysUpdateEvent service function + * Inputs: + * 1 : None + * Outputs: + * 1 : Result of function, 0 on success, otherwise error code + * 2 : Copy handle descriptor + * 3 : System Update event handle + */ +void CheckForSysUpdateEvent(Service::Interface* self); + +/** * NIM::CheckSysUpdateAvailable service function * Inputs: * 1 : None diff --git a/src/core/hle/service/nim/nim_u.cpp b/src/core/hle/service/nim/nim_u.cpp index 7664bad60..569660278 100644 --- a/src/core/hle/service/nim/nim_u.cpp +++ b/src/core/hle/service/nim/nim_u.cpp @@ -12,7 +12,7 @@ const Interface::FunctionInfo FunctionTable[] = { {0x00010000, nullptr, "StartSysUpdate"}, {0x00020000, nullptr, "GetUpdateDownloadProgress"}, {0x00040000, nullptr, "FinishTitlesInstall"}, - {0x00050000, nullptr, "CheckForSysUpdateEvent"}, + {0x00050000, CheckForSysUpdateEvent, "CheckForSysUpdateEvent"}, {0x00090000, CheckSysUpdateAvailable, "CheckSysUpdateAvailable"}, {0x000A0000, nullptr, "GetState"}, {0x000B0000, nullptr, "GetSystemTitleHash"}, diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 847e69710..7f58be6de 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -82,10 +82,10 @@ void UnmapRegion(PageTable& page_table, VAddr base, u32 size) { * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) * using a VMA from the current process */ -static u8* GetPointerFromVMA(VAddr vaddr) { +static u8* GetPointerFromVMA(const Kernel::Process& process, VAddr vaddr) { u8* direct_pointer = nullptr; - auto& vm_manager = Kernel::g_current_process->vm_manager; + auto& vm_manager = process.vm_manager; auto it = vm_manager.FindVMA(vaddr); ASSERT(it != vm_manager.vma_map.end()); @@ -108,6 +108,14 @@ static u8* GetPointerFromVMA(VAddr vaddr) { } /** + * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) + * using a VMA from the current process. + */ +static u8* GetPointerFromVMA(VAddr vaddr) { + return GetPointerFromVMA(*Kernel::g_current_process, vaddr); +} + +/** * This function should only be called for virtual addreses with attribute `PageType::Special`. */ static MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) { @@ -470,7 +478,10 @@ u64 Read64(const VAddr addr) { return Read<u64_le>(addr); } -void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) { +void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, + const size_t size) { + auto& page_table = process.vm_manager.page_table; + size_t remaining_size = size; size_t page_index = src_addr >> PAGE_BITS; size_t page_offset = src_addr & PAGE_MASK; @@ -479,7 +490,7 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) { const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); - switch (current_page_table->attributes[page_index]) { + switch (page_table.attributes[page_index]) { case PageType::Unmapped: { LOG_ERROR(HW_Memory, "unmapped ReadBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, src_addr, size); @@ -487,29 +498,30 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) { break; } case PageType::Memory: { - DEBUG_ASSERT(current_page_table->pointers[page_index]); + DEBUG_ASSERT(page_table.pointers[page_index]); - const u8* src_ptr = current_page_table->pointers[page_index] + page_offset; + const u8* src_ptr = page_table.pointers[page_index] + page_offset; std::memcpy(dest_buffer, src_ptr, copy_amount); break; } case PageType::Special: { - DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); - - GetMMIOHandler(current_vaddr)->ReadBlock(current_vaddr, dest_buffer, copy_amount); + MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); + DEBUG_ASSERT(handler); + handler->ReadBlock(current_vaddr, dest_buffer, copy_amount); break; } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), FlushMode::Flush); - std::memcpy(dest_buffer, GetPointerFromVMA(current_vaddr), copy_amount); + std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount); break; } case PageType::RasterizerCachedSpecial: { - DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); + MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); + DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), FlushMode::Flush); - GetMMIOHandler(current_vaddr)->ReadBlock(current_vaddr, dest_buffer, copy_amount); + handler->ReadBlock(current_vaddr, dest_buffer, copy_amount); break; } default: @@ -523,6 +535,10 @@ void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) { } } +void ReadBlock(const VAddr src_addr, void* dest_buffer, const size_t size) { + ReadBlock(*Kernel::g_current_process, src_addr, dest_buffer, size); +} + void Write8(const VAddr addr, const u8 data) { Write<u8>(addr, data); } @@ -539,7 +555,9 @@ void Write64(const VAddr addr, const u64 data) { Write<u64_le>(addr, data); } -void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size) { +void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, + const size_t size) { + auto& page_table = process.vm_manager.page_table; size_t remaining_size = size; size_t page_index = dest_addr >> PAGE_BITS; size_t page_offset = dest_addr & PAGE_MASK; @@ -548,7 +566,7 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size const size_t copy_amount = std::min(PAGE_SIZE - page_offset, remaining_size); const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); - switch (current_page_table->attributes[page_index]) { + switch (page_table.attributes[page_index]) { case PageType::Unmapped: { LOG_ERROR(HW_Memory, "unmapped WriteBlock @ 0x%08X (start address = 0x%08X, size = %zu)", @@ -556,29 +574,30 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size break; } case PageType::Memory: { - DEBUG_ASSERT(current_page_table->pointers[page_index]); + DEBUG_ASSERT(page_table.pointers[page_index]); - u8* dest_ptr = current_page_table->pointers[page_index] + page_offset; + u8* dest_ptr = page_table.pointers[page_index] + page_offset; std::memcpy(dest_ptr, src_buffer, copy_amount); break; } case PageType::Special: { - DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); - - GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, src_buffer, copy_amount); + MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); + DEBUG_ASSERT(handler); + handler->WriteBlock(current_vaddr, src_buffer, copy_amount); break; } case PageType::RasterizerCachedMemory: { RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), FlushMode::FlushAndInvalidate); - std::memcpy(GetPointerFromVMA(current_vaddr), src_buffer, copy_amount); + std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); break; } case PageType::RasterizerCachedSpecial: { - DEBUG_ASSERT(GetMMIOHandler(current_vaddr)); + MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr); + DEBUG_ASSERT(handler); RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), FlushMode::FlushAndInvalidate); - GetMMIOHandler(current_vaddr)->WriteBlock(current_vaddr, src_buffer, copy_amount); + handler->WriteBlock(current_vaddr, src_buffer, copy_amount); break; } default: @@ -592,6 +611,10 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size } } +void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size) { + WriteBlock(*Kernel::g_current_process, dest_addr, src_buffer, size); +} + void ZeroBlock(const VAddr dest_addr, const size_t size) { size_t remaining_size = size; size_t page_index = dest_addr >> PAGE_BITS; diff --git a/src/core/memory.h b/src/core/memory.h index 347c08c78..dd599f73e 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -205,7 +205,11 @@ void Write16(VAddr addr, u16 data); void Write32(VAddr addr, u32 data); void Write64(VAddr addr, u64 data); +void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, + size_t size); void ReadBlock(const VAddr src_addr, void* dest_buffer, size_t size); +void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, + size_t size); void WriteBlock(const VAddr dest_addr, const void* src_buffer, size_t size); void ZeroBlock(const VAddr dest_addr, const size_t size); void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size); diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 3ab4af374..caf9f7a06 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -119,6 +119,224 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, } } +static void LoadDefaultVertexAttributes(u32 register_value) { + auto& regs = g_state.regs; + + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + default_attr_write_buffer[default_attr_counter++] = register_value; + + // Default attributes are written in a packed format such that four float24 values are encoded + // in three 32-bit numbers. + // We write to internal memory once a full such vector is written. + if (default_attr_counter >= 3) { + default_attr_counter = 0; + + auto& setup = regs.pipeline.vs_default_attributes_setup; + + if (setup.index >= 16) { + LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); + return; + } + + Math::Vec4<float24> attribute; + + // NOTE: The destination component order indeed is "backwards" + attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | + ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | + ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); + + LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, + attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), + attribute.w.ToFloat32()); + + // TODO: Verify that this actually modifies the register! + if (setup.index < 15) { + g_state.input_default_attributes.attr[setup.index] = attribute; + setup.index++; + } else { + // Put each attribute into an immediate input buffer. When all specified immediate + // attributes are present, the Vertex Shader is invoked and everything is sent to + // the primitive assembler. + + auto& immediate_input = g_state.immediate.input_vertex; + auto& immediate_attribute_id = g_state.immediate.current_attribute; + + immediate_input.attr[immediate_attribute_id] = attribute; + + if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) { + immediate_attribute_id += 1; + } else { + MICROPROFILE_SCOPE(GPU_Drawing); + immediate_attribute_id = 0; + + auto* shader_engine = Shader::GetEngine(); + shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); + + // Send to vertex shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, + static_cast<void*>(&immediate_input)); + Shader::UnitState shader_unit; + Shader::AttributeBuffer output{}; + + shader_unit.LoadInput(regs.vs, immediate_input); + shader_engine->Run(g_state.vs, shader_unit); + shader_unit.WriteOutput(regs.vs, output); + + // Send to geometry pipeline + if (g_state.immediate.reset_geometry_pipeline) { + g_state.geometry_pipeline.Reconfigure(); + g_state.immediate.reset_geometry_pipeline = false; + } + ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); + g_state.geometry_pipeline.Setup(shader_engine); + g_state.geometry_pipeline.SubmitVertex(output); + + // TODO: If drawing after every immediate mode triangle kills performance, + // change it to flush triangles whenever a drawing config register changes + // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); + if (g_debug_context) { + g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } + } + } + } +} + +static void Draw(u32 command_id) { + MICROPROFILE_SCOPE(GPU_Drawing); + auto& regs = g_state.regs; + +#if PICA_LOG_TEV + DebugUtils::DumpTevStageConfig(regs.GetTevStages()); +#endif + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); + + // Processes information about internal vertex attributes to figure out how a vertex is + // loaded. + // Later, these can be compiled and cached. + const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress(); + VertexLoader loader(regs.pipeline); + + // Load vertices + bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); + + const auto& index_info = regs.pipeline.index_array; + const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); + const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); + bool index_u16 = index_info.format != 0; + + PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; + + if (g_debug_context && g_debug_context->recorder) { + for (int i = 0; i < 3; ++i) { + const auto texture = regs.texturing.GetTextures()[i]; + if (!texture.enabled) + continue; + + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + g_debug_context->recorder->MemoryAccessed( + texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) * + texture.config.width / 2 * texture.config.height, + texture.config.GetPhysicalAddress()); + } + } + + DebugUtils::MemoryAccessTracker memory_accesses; + + // Simple circular-replacement vertex cache + // The size has been tuned for optimal balance between hit-rate and the cost of lookup + const size_t VERTEX_CACHE_SIZE = 32; + std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; + std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache; + Shader::AttributeBuffer vs_output; + + unsigned int vertex_cache_pos = 0; + vertex_cache_ids.fill(-1); + + auto* shader_engine = Shader::GetEngine(); + Shader::UnitState shader_unit; + + shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); + + g_state.geometry_pipeline.Reconfigure(); + g_state.geometry_pipeline.Setup(shader_engine); + if (g_state.geometry_pipeline.NeedIndexInput()) + ASSERT(is_indexed); + + for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { + // Indexed rendering doesn't use the start offset + unsigned int vertex = is_indexed + ? (index_u16 ? index_address_16[index] : index_address_8[index]) + : (index + regs.pipeline.vertex_offset); + + // -1 is a common special value used for primitive restart. Since it's unknown if + // the PICA supports it, and it would mess up the caching, guard against it here. + ASSERT(vertex != -1); + + bool vertex_cache_hit = false; + + if (is_indexed) { + if (g_state.geometry_pipeline.NeedIndexInput()) { + g_state.geometry_pipeline.SubmitIndex(vertex); + continue; + } + + if (g_debug_context && Pica::g_debug_context->recorder) { + int size = index_u16 ? 2 : 1; + memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); + } + + for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { + if (vertex == vertex_cache_ids[i]) { + vs_output = vertex_cache[i]; + vertex_cache_hit = true; + break; + } + } + } + + if (!vertex_cache_hit) { + // Initialize data for the current vertex + Shader::AttributeBuffer input; + loader.LoadVertex(base_address, index, vertex, input, memory_accesses); + + // Send to vertex shader + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, + (void*)&input); + shader_unit.LoadInput(regs.vs, input); + shader_engine->Run(g_state.vs, shader_unit); + shader_unit.WriteOutput(regs.vs, vs_output); + + if (is_indexed) { + vertex_cache[vertex_cache_pos] = vs_output; + vertex_cache_ids[vertex_cache_pos] = vertex; + vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; + } + } + + // Send to geometry pipeline + g_state.geometry_pipeline.SubmitVertex(vs_output); + } + + for (auto& range : memory_accesses.ranges) { + g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), + range.second, range.first); + } + + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); + if (g_debug_context) { + g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } +} + static void WritePicaReg(u32 id, u32 value, u32 mask) { auto& regs = g_state.regs; @@ -168,95 +386,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Load default vertex input attributes case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233): case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234): - case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): { - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - default_attr_write_buffer[default_attr_counter++] = value; - - // Default attributes are written in a packed format such that four float24 values are - // encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if (default_attr_counter >= 3) { - default_attr_counter = 0; - - auto& setup = regs.pipeline.vs_default_attributes_setup; - - if (setup.index >= 16) { - LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); - break; - } - - Math::Vec4<float24> attribute; - - // NOTE: The destination component order indeed is "backwards" - attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); - attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | - ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | - ((default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); - - LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, - attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), - attribute.w.ToFloat32()); - - // TODO: Verify that this actually modifies the register! - if (setup.index < 15) { - g_state.input_default_attributes.attr[setup.index] = attribute; - setup.index++; - } else { - // Put each attribute into an immediate input buffer. When all specified immediate - // attributes are present, the Vertex Shader is invoked and everything is sent to - // the primitive assembler. - - auto& immediate_input = g_state.immediate.input_vertex; - auto& immediate_attribute_id = g_state.immediate.current_attribute; - - immediate_input.attr[immediate_attribute_id] = attribute; - - if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) { - immediate_attribute_id += 1; - } else { - MICROPROFILE_SCOPE(GPU_Drawing); - immediate_attribute_id = 0; - - auto* shader_engine = Shader::GetEngine(); - shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); - - // Send to vertex shader - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, - static_cast<void*>(&immediate_input)); - Shader::UnitState shader_unit; - Shader::AttributeBuffer output{}; - - shader_unit.LoadInput(regs.vs, immediate_input); - shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, output); - - // Send to geometry pipeline - if (g_state.immediate.reset_geometry_pipeline) { - g_state.geometry_pipeline.Reconfigure(); - g_state.immediate.reset_geometry_pipeline = false; - } - ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); - g_state.geometry_pipeline.Setup(shader_engine); - g_state.geometry_pipeline.SubmitVertex(output); - - // TODO: If drawing after every immediate mode triangle kills performance, - // change it to flush triangles whenever a drawing config register changes - // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 - VideoCore::g_renderer->Rasterizer()->DrawTriangles(); - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, - nullptr); - } - } - } - } + case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): + LoadDefaultVertexAttributes(value); break; - } case PICA_REG_INDEX(pipeline.gpu_mode): // This register likely just enables vertex processing and doesn't need any special handling @@ -275,136 +407,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // It seems like these trigger vertex rendering case PICA_REG_INDEX(pipeline.trigger_draw): - case PICA_REG_INDEX(pipeline.trigger_draw_indexed): { - MICROPROFILE_SCOPE(GPU_Drawing); - -#if PICA_LOG_TEV - DebugUtils::DumpTevStageConfig(regs.GetTevStages()); -#endif - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); - - // Processes information about internal vertex attributes to figure out how a vertex is - // loaded. - // Later, these can be compiled and cached. - const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress(); - VertexLoader loader(regs.pipeline); - - // Load vertices - bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); - - const auto& index_info = regs.pipeline.index_array; - const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); - const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); - bool index_u16 = index_info.format != 0; - - PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; - - if (g_debug_context && g_debug_context->recorder) { - for (int i = 0; i < 3; ++i) { - const auto texture = regs.texturing.GetTextures()[i]; - if (!texture.enabled) - continue; - - u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); - g_debug_context->recorder->MemoryAccessed( - texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) * - texture.config.width / 2 * texture.config.height, - texture.config.GetPhysicalAddress()); - } - } - - DebugUtils::MemoryAccessTracker memory_accesses; - - // Simple circular-replacement vertex cache - // The size has been tuned for optimal balance between hit-rate and the cost of lookup - const size_t VERTEX_CACHE_SIZE = 32; - std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; - std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache; - Shader::AttributeBuffer vs_output; - - unsigned int vertex_cache_pos = 0; - vertex_cache_ids.fill(-1); - - auto* shader_engine = Shader::GetEngine(); - Shader::UnitState shader_unit; - - shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); - - g_state.geometry_pipeline.Reconfigure(); - g_state.geometry_pipeline.Setup(shader_engine); - if (g_state.geometry_pipeline.NeedIndexInput()) - ASSERT(is_indexed); - - for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { - // Indexed rendering doesn't use the start offset - unsigned int vertex = - is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) - : (index + regs.pipeline.vertex_offset); - - // -1 is a common special value used for primitive restart. Since it's unknown if - // the PICA supports it, and it would mess up the caching, guard against it here. - ASSERT(vertex != -1); - - bool vertex_cache_hit = false; - - if (is_indexed) { - if (g_state.geometry_pipeline.NeedIndexInput()) { - g_state.geometry_pipeline.SubmitIndex(vertex); - continue; - } - - if (g_debug_context && Pica::g_debug_context->recorder) { - int size = index_u16 ? 2 : 1; - memory_accesses.AddAccess(base_address + index_info.offset + size * index, - size); - } - - for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { - if (vertex == vertex_cache_ids[i]) { - vs_output = vertex_cache[i]; - vertex_cache_hit = true; - break; - } - } - } - - if (!vertex_cache_hit) { - // Initialize data for the current vertex - Shader::AttributeBuffer input; - loader.LoadVertex(base_address, index, vertex, input, memory_accesses); - - // Send to vertex shader - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, - (void*)&input); - shader_unit.LoadInput(regs.vs, input); - shader_engine->Run(g_state.vs, shader_unit); - shader_unit.WriteOutput(regs.vs, vs_output); - - if (is_indexed) { - vertex_cache[vertex_cache_pos] = vs_output; - vertex_cache_ids[vertex_cache_pos] = vertex; - vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; - } - } - - // Send to geometry pipeline - g_state.geometry_pipeline.SubmitVertex(vs_output); - } - - for (auto& range : memory_accesses.ranges) { - g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), - range.second, range.first); - } - - VideoCore::g_renderer->Rasterizer()->DrawTriangles(); - if (g_debug_context) { - g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } - + case PICA_REG_INDEX(pipeline.trigger_draw_indexed): + Draw(id); break; - } case PICA_REG_INDEX(gs.bool_uniforms): WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); |