diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/citra_qt/CMakeLists.txt | 8 | ||||
-rw-r--r-- | src/citra_qt/debugger/callstack.ui | 3 | ||||
-rw-r--r-- | src/common/math_util.h | 2 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/arm_regformat.h | 2 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp.cpp | 4 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp.h | 1 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpinstr.cpp | 18 | ||||
-rw-r--r-- | src/core/hle/function_wrappers.h | 17 | ||||
-rw-r--r-- | src/core/hle/kernel/event.cpp | 3 | ||||
-rw-r--r-- | src/core/hle/kernel/kernel.cpp | 22 | ||||
-rw-r--r-- | src/core/hle/kernel/kernel.h | 6 | ||||
-rw-r--r-- | src/core/hle/kernel/mutex.cpp | 9 | ||||
-rw-r--r-- | src/core/hle/kernel/semaphore.cpp | 8 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 76 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.h | 14 | ||||
-rw-r--r-- | src/core/hle/svc.cpp | 26 | ||||
-rw-r--r-- | src/core/hw/hw.cpp | 2 | ||||
-rw-r--r-- | src/video_core/pica.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 154 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 36 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 8 |
22 files changed, 237 insertions, 195 deletions
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index c05779380..c2d1ad240 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -91,10 +91,10 @@ if (Qt5_FOUND AND MSVC) set(PLATFORMS ${DLL_DEST}platforms/) # windows commandline expects the / to be \ so switch them - string(REPLACE "/" "\\" Qt5_DLL_DIR ${Qt5_DLL_DIR}) - string(REPLACE "/" "\\" Qt5_PLATFORMS_DIR ${Qt5_PLATFORMS_DIR}) - string(REPLACE "/" "\\" DLL_DEST ${DLL_DEST}) - string(REPLACE "/" "\\" PLATFORMS ${PLATFORMS}) + string(REPLACE "/" "\\\\" Qt5_DLL_DIR ${Qt5_DLL_DIR}) + string(REPLACE "/" "\\\\" Qt5_PLATFORMS_DIR ${Qt5_PLATFORMS_DIR}) + string(REPLACE "/" "\\\\" DLL_DEST ${DLL_DEST}) + string(REPLACE "/" "\\\\" PLATFORMS ${PLATFORMS}) # /NJH /NJS /NDL /NFL /NC /NS /NP - Silence any output # cmake adds an extra check for command success which doesn't work too well with robocopy diff --git a/src/citra_qt/debugger/callstack.ui b/src/citra_qt/debugger/callstack.ui index b0e31120f..248ea3dd7 100644 --- a/src/citra_qt/debugger/callstack.ui +++ b/src/citra_qt/debugger/callstack.ui @@ -17,6 +17,9 @@ <layout class="QVBoxLayout" name="verticalLayout"> <item> <widget class="QTreeView" name="treeView"> + <property name="editTriggers"> + <set>QAbstractItemView::NoEditTriggers</set> + </property> <property name="alternatingRowColors"> <bool>true</bool> </property> diff --git a/src/common/math_util.h b/src/common/math_util.h index 4b0910741..d44b06e74 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -12,7 +12,7 @@ namespace MathUtil { inline bool IntervalsIntersect(unsigned start0, unsigned length0, unsigned start1, unsigned length1) { - return (std::max(start0, start1) <= std::min(start0 + length0, start1 + length1)); + return (std::max(start0, start1) < std::min(start0 + length0, start1 + length1)); } template<typename T> diff --git a/src/core/arm/skyeye_common/arm_regformat.h b/src/core/arm/skyeye_common/arm_regformat.h index 6c89774eb..a92effbb4 100644 --- a/src/core/arm/skyeye_common/arm_regformat.h +++ b/src/core/arm/skyeye_common/arm_regformat.h @@ -59,6 +59,8 @@ enum { VFP_FPSID, VFP_FPSCR, VFP_FPEXC, + VFP_MVFR0, + VFP_MVFR1, // Not an actual register. // All VFP system registers should be defined above this. diff --git a/src/core/arm/skyeye_common/vfp/vfp.cpp b/src/core/arm/skyeye_common/vfp/vfp.cpp index b88d47750..571d6c2f2 100644 --- a/src/core/arm/skyeye_common/vfp/vfp.cpp +++ b/src/core/arm/skyeye_common/vfp/vfp.cpp @@ -33,6 +33,10 @@ unsigned VFPInit(ARMul_State* state) state->VFP[VFP_FPEXC] = 0; state->VFP[VFP_FPSCR] = 0; + // ARM11 MPCore feature register values. + state->VFP[VFP_MVFR0] = 0x11111111; + state->VFP[VFP_MVFR1] = 0; + return 0; } diff --git a/src/core/arm/skyeye_common/vfp/vfp.h b/src/core/arm/skyeye_common/vfp/vfp.h index 727350f14..acefae9bb 100644 --- a/src/core/arm/skyeye_common/vfp/vfp.h +++ b/src/core/arm/skyeye_common/vfp/vfp.h @@ -22,7 +22,6 @@ #include "core/arm/skyeye_common/vfp/vfp_helper.h" /* for references to cdp SoftFloat functions */ -#define VFP_DEBUG_UNIMPLEMENTED(x) LOG_ERROR(Core_ARM11, "in func %s, " #x " unimplemented\n", __FUNCTION__); exit(-1); #define VFP_DEBUG_UNTESTED(x) LOG_TRACE(Core_ARM11, "in func %s, " #x " untested\n", __FUNCTION__); #define CHECK_VFP_ENABLED #define CHECK_VFP_CDP_RET vfp_raise_exceptions(cpu, ret, inst_cream->instr, cpu->VFP[VFP_FPSCR]); diff --git a/src/core/arm/skyeye_common/vfp/vfpinstr.cpp b/src/core/arm/skyeye_common/vfp/vfpinstr.cpp index 3ed918a93..67fe63aa4 100644 --- a/src/core/arm/skyeye_common/vfp/vfpinstr.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpinstr.cpp @@ -1068,10 +1068,12 @@ static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbrc)(unsigned int inst, int index) #ifdef VFP_INTERPRETER_IMPL VMOVBRC_INST: { - if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) { + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { CHECK_VFP_ENABLED; - VFP_DEBUG_UNIMPLEMENTED(VMOVBRC); + vmovbrc_inst* const inst_cream = (vmovbrc_inst*)inst_base->component; + + cpu->ExtReg[(2 * inst_cream->d) + inst_cream->index] = cpu->Reg[inst_cream->t]; } cpu->Reg[15] += GET_INST_SIZE(cpu); INC_PC(sizeof(vmovbrc_inst)); @@ -1139,12 +1141,10 @@ VMRS_INST: cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_FPSID]; break; case 6: - /* MVFR1, VFPv3 only ? */ - LOG_TRACE(Core_ARM11, "\tr%d <= MVFR1 unimplemented\n", inst_cream->Rt); + cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_MVFR1]; break; case 7: - /* MVFR0, VFPv3 only? */ - LOG_TRACE(Core_ARM11, "\tr%d <= MVFR0 unimplemented\n", inst_cream->Rt); + cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_MVFR0]; break; case 8: cpu->Reg[inst_cream->Rt] = cpu->VFP[VFP_FPEXC]; @@ -1195,10 +1195,12 @@ static ARM_INST_PTR INTERPRETER_TRANSLATE(vmovbcr)(unsigned int inst, int index) #ifdef VFP_INTERPRETER_IMPL VMOVBCR_INST: { - if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) { + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { CHECK_VFP_ENABLED; - VFP_DEBUG_UNIMPLEMENTED(VMOVBCR); + vmovbcr_inst* const inst_cream = (vmovbcr_inst*) inst_base->component; + + cpu->Reg[inst_cream->t] = cpu->ExtReg[(2 * inst_cream->d) + inst_cream->index]; } cpu->Reg[15] += GET_INST_SIZE(cpu); INC_PC(sizeof(vmovbcr_inst)); diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h index 23c86a72d..5949cb470 100644 --- a/src/core/hle/function_wrappers.h +++ b/src/core/hle/function_wrappers.h @@ -9,11 +9,15 @@ #include "core/arm/arm_interface.h" #include "core/memory.h" #include "core/hle/hle.h" +#include "core/hle/result.h" namespace HLE { #define PARAM(n) Core::g_app_core->GetReg(n) +/// An invalid result code that is meant to be overwritten when a thread resumes from waiting +static const ResultCode RESULT_INVALID(0xDEADC0DE); + /** * HLE a function return from the current ARM11 userland process * @param res Result to return @@ -57,8 +61,11 @@ template<ResultCode func(s32*, u32*, s32, bool, s64)> void Wrap() { s32 param_1 = 0; s32 retval = func(¶m_1, (Handle*)Memory::GetPointer(PARAM(1)), (s32)PARAM(2), (PARAM(3) != 0), (((s64)PARAM(4) << 32) | PARAM(0))).raw; - Core::g_app_core->SetReg(1, (u32)param_1); - FuncReturn(retval); + + if (retval != RESULT_INVALID.raw) { + Core::g_app_core->SetReg(1, (u32)param_1); + FuncReturn(retval); + } } template<ResultCode func(u32, u32, u32, u32, s64)> void Wrap() { @@ -73,7 +80,11 @@ template<ResultCode func(u32*)> void Wrap(){ } template<ResultCode func(u32, s64)> void Wrap() { - FuncReturn(func(PARAM(0), (((s64)PARAM(3) << 32) | PARAM(2))).raw); + s32 retval = func(PARAM(0), (((s64)PARAM(3) << 32) | PARAM(2))).raw; + + if (retval != RESULT_INVALID.raw) { + FuncReturn(retval); + } } template<ResultCode func(void*, void*, u32)> void Wrap(){ diff --git a/src/core/hle/kernel/event.cpp b/src/core/hle/kernel/event.cpp index e45deb1c6..f338f3266 100644 --- a/src/core/hle/kernel/event.cpp +++ b/src/core/hle/kernel/event.cpp @@ -41,10 +41,7 @@ void Event::Acquire() { void Event::Signal() { signaled = true; - WakeupAllWaitingThreads(); - - HLE::Reschedule(__func__); } void Event::Clear() { diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 726e4d2ff..20e11da16 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -32,27 +32,13 @@ void WaitObject::RemoveWaitingThread(Thread* thread) { waiting_threads.erase(itr); } -SharedPtr<Thread> WaitObject::WakeupNextThread() { - if (waiting_threads.empty()) - return nullptr; - - auto next_thread = std::move(waiting_threads.front()); - waiting_threads.erase(waiting_threads.begin()); - - next_thread->ReleaseWaitObject(this); - - return next_thread; -} - void WaitObject::WakeupAllWaitingThreads() { - auto waiting_threads_copy = waiting_threads; + for (auto thread : waiting_threads) + thread->ResumeFromWait(); - // We use a copy because ReleaseWaitObject will remove the thread from this object's - // waiting_threads list - for (auto thread : waiting_threads_copy) - thread->ReleaseWaitObject(this); + waiting_threads.clear(); - ASSERT_MSG(waiting_threads.empty(), "failed to awaken all waiting threads!"); + HLE::Reschedule(__func__); } HandleTable::HandleTable() { diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index a5a0f4800..64595f758 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -140,12 +140,6 @@ public: */ void RemoveWaitingThread(Thread* thread); - /** - * Wake up the next thread waiting on this object - * @return Pointer to the thread that was resumed, nullptr if no threads are waiting - */ - SharedPtr<Thread> WakeupNextThread(); - /// Wake up all threads waiting on this object void WakeupAllWaitingThreads(); diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 6aa73df86..edb97d324 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -23,12 +23,7 @@ static void ResumeWaitingThread(Mutex* mutex) { // Reset mutex lock thread handle, nothing is waiting mutex->lock_count = 0; mutex->holding_thread = nullptr; - - // Find the next waiting thread for the mutex... - auto next_thread = mutex->WakeupNextThread(); - if (next_thread != nullptr) { - mutex->Acquire(next_thread); - } + mutex->WakeupAllWaitingThreads(); } void ReleaseThreadMutexes(Thread* thread) { @@ -94,8 +89,6 @@ void Mutex::Release() { ResumeWaitingThread(this); } } - - HLE::Reschedule(__func__); } } // namespace diff --git a/src/core/hle/kernel/semaphore.cpp b/src/core/hle/kernel/semaphore.cpp index 96d61ed3a..4b359ed07 100644 --- a/src/core/hle/kernel/semaphore.cpp +++ b/src/core/hle/kernel/semaphore.cpp @@ -48,13 +48,7 @@ ResultVal<s32> Semaphore::Release(s32 release_count) { s32 previous_count = available_count; available_count += release_count; - // Notify some of the threads that the semaphore has been released - // stop once the semaphore is full again or there are no more waiting threads - while (!ShouldWait() && WakeupNextThread() != nullptr) { - Acquire(); - } - - HLE::Reschedule(__func__); + WakeupAllWaitingThreads(); return MakeResult<s32>(previous_count); } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 22c795ad4..4729a7fe0 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -13,6 +13,7 @@ #include "common/thread_queue_list.h" #include "core/arm/arm_interface.h" +#include "core/arm/skyeye_common/armdefs.h" #include "core/core.h" #include "core/core_timing.h" #include "core/hle/hle.h" @@ -193,8 +194,22 @@ static void SwitchContext(Thread* new_thread) { if (new_thread) { DEBUG_ASSERT_MSG(new_thread->status == THREADSTATUS_READY, "Thread must be ready to become running."); + // Cancel any outstanding wakeup events for this thread + CoreTiming::UnscheduleEvent(ThreadWakeupEventType, new_thread->callback_handle); + current_thread = new_thread; + // If the thread was waited by a svcWaitSynch call, step back PC by one instruction to rerun + // the SVC when the thread wakes up. This is necessary to ensure that the thread can acquire + // the requested wait object(s) before continuing. + if (new_thread->waitsynch_waited) { + // CPSR flag indicates CPU mode + bool thumb_mode = (new_thread->context.cpsr & TBIT) != 0; + + // SVC instruction is 2 bytes for THUMB, 4 bytes for ARM + new_thread->context.pc -= thumb_mode ? 2 : 4; + } + ready_queue.remove(new_thread->current_priority, new_thread); new_thread->status = THREADSTATUS_RUNNING; @@ -243,6 +258,7 @@ void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wa thread->wait_set_output = wait_set_output; thread->wait_all = wait_all; thread->wait_objects = std::move(wait_objects); + thread->waitsynch_waited = true; thread->status = THREADSTATUS_WAIT_SYNCH; } @@ -268,6 +284,8 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { return; } + thread->waitsynch_waited = false; + if (thread->status == THREADSTATUS_WAIT_SYNCH) { thread->SetWaitSynchronizationResult(ResultCode(ErrorDescription::Timeout, ErrorModule::OS, ErrorSummary::StatusChanged, ErrorLevel::Info)); @@ -288,63 +306,20 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { CoreTiming::ScheduleEvent(usToCycles(microseconds), ThreadWakeupEventType, callback_handle); } -void Thread::ReleaseWaitObject(WaitObject* wait_object) { - if (status != THREADSTATUS_WAIT_SYNCH || wait_objects.empty()) { - LOG_CRITICAL(Kernel, "thread is not waiting on any objects!"); - return; - } - - // Remove this thread from the waiting object's thread list - wait_object->RemoveWaitingThread(this); - - unsigned index = 0; - bool wait_all_failed = false; // Will be set to true if any object is unavailable - - // Iterate through all waiting objects to check availability... - for (auto itr = wait_objects.begin(); itr != wait_objects.end(); ++itr) { - if ((*itr)->ShouldWait()) - wait_all_failed = true; - - // The output should be the last index of wait_object - if (*itr == wait_object) - index = itr - wait_objects.begin(); - } - - // If we are waiting on all objects... - if (wait_all) { - // Resume the thread only if all are available... - if (!wait_all_failed) { - SetWaitSynchronizationResult(RESULT_SUCCESS); - SetWaitSynchronizationOutput(-1); - - ResumeFromWait(); - } - } else { - // Otherwise, resume - SetWaitSynchronizationResult(RESULT_SUCCESS); - - if (wait_set_output) - SetWaitSynchronizationOutput(index); - - ResumeFromWait(); - } -} - void Thread::ResumeFromWait() { - // Cancel any outstanding wakeup events for this thread - CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle); - switch (status) { case THREADSTATUS_WAIT_SYNCH: - // Remove this thread from all other WaitObjects - for (auto wait_object : wait_objects) - wait_object->RemoveWaitingThread(this); - break; case THREADSTATUS_WAIT_ARB: case THREADSTATUS_WAIT_SLEEP: break; - case THREADSTATUS_RUNNING: + case THREADSTATUS_READY: + // If the thread is waiting on multiple wait objects, it might be awoken more than once + // before actually resuming. We can ignore subsequent wakeups if the thread status has + // already been set to THREADSTATUS_READY. + return; + + case THREADSTATUS_RUNNING: DEBUG_ASSERT_MSG(false, "Thread with object id %u has already resumed.", GetObjectId()); return; case THREADSTATUS_DEAD: @@ -415,6 +390,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); thread->owner_process = g_current_process; thread->tls_index = -1; + thread->waitsynch_waited = false; // Find the next available TLS index, and mark it as used auto& used_tls_slots = Kernel::g_current_process->used_tls_slots; diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 2c65419c3..b8160bb2c 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -96,12 +96,6 @@ public: u32 GetThreadId() const { return thread_id; } /** - * Release an acquired wait object - * @param wait_object WaitObject to release - */ - void ReleaseWaitObject(WaitObject* wait_object); - - /** * Resumes a thread from waiting */ void ResumeFromWait(); @@ -152,6 +146,8 @@ public: s32 tls_index; ///< Index of the Thread Local Storage of the thread + bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait + /// Mutexes currently held by this thread, which will be released when it exits. boost::container::flat_set<SharedPtr<Mutex>> held_mutexes; @@ -163,12 +159,12 @@ public: std::string name; + /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. + Handle callback_handle; + private: Thread(); ~Thread() override; - - /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. - Handle callback_handle; }; /** diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index d1555c753..6cde4fc87 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -40,9 +40,6 @@ const ResultCode ERR_NOT_FOUND(ErrorDescription::NotFound, ErrorModule::Kernel, const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E -/// An invalid result code that is meant to be overwritten when a thread resumes from waiting -const ResultCode RESULT_INVALID(0xDEADC0DE); - enum ControlMemoryOperation { MEMORY_OPERATION_HEAP = 0x00000003, MEMORY_OPERATION_GSP_HEAP = 0x00010003, @@ -143,6 +140,10 @@ static ResultCode CloseHandle(Handle handle) { /// Wait for a handle to synchronize, timeout after the specified nanoseconds static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) { auto object = Kernel::g_handle_table.GetWaitObject(handle); + Kernel::Thread* thread = Kernel::GetCurrentThread(); + + thread->waitsynch_waited = false; + if (object == nullptr) return ERR_INVALID_HANDLE; @@ -154,14 +155,14 @@ static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) { // Check for next thread to schedule if (object->ShouldWait()) { - object->AddWaitingThread(Kernel::GetCurrentThread()); + object->AddWaitingThread(thread); Kernel::WaitCurrentThread_WaitSynchronization({ object }, false, false); // Create an event to wake the thread up after the specified nanosecond delay has passed - Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); + thread->WakeAfterDelay(nano_seconds); // NOTE: output of this SVC will be set later depending on how the thread resumes - return RESULT_INVALID; + return HLE::RESULT_INVALID; } object->Acquire(); @@ -173,6 +174,9 @@ static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) { static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_count, bool wait_all, s64 nano_seconds) { bool wait_thread = !wait_all; int handle_index = 0; + Kernel::Thread* thread = Kernel::GetCurrentThread(); + bool was_waiting = thread->waitsynch_waited; + thread->waitsynch_waited = false; // Check if 'handles' is invalid if (handles == nullptr) @@ -190,6 +194,9 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou // necessary if (handle_count != 0) { bool selected = false; // True once an object has been selected + + Kernel::SharedPtr<Kernel::WaitObject> wait_object; + for (int i = 0; i < handle_count; ++i) { auto object = Kernel::g_handle_table.GetWaitObject(handles[i]); if (object == nullptr) @@ -204,10 +211,11 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou wait_thread = true; } else { // Do not wait on this object, check if this object should be selected... - if (!wait_all && !selected) { + if (!wait_all && (!selected || (wait_object == object && was_waiting))) { // Do not wait the thread wait_thread = false; handle_index = i; + wait_object = object; selected = true; } } @@ -241,7 +249,7 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); // NOTE: output of this SVC will be set later depending on how the thread resumes - return RESULT_INVALID; + return HLE::RESULT_INVALID; } // Acquire objects if we did not wait... @@ -261,7 +269,7 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou // TODO(bunnei): If 'wait_all' is true, this is probably wrong. However, real hardware does // not seem to set it to any meaningful value. - *out = wait_all ? 0 : handle_index; + *out = handle_count != 0 ? (wait_all ? -1 : handle_index) : 0; return RESULT_SUCCESS; } diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp index f4906cc7e..c7006a498 100644 --- a/src/core/hw/hw.cpp +++ b/src/core/hw/hw.cpp @@ -18,7 +18,7 @@ inline void Read(T &var, const u32 addr) { GPU::Read(var, addr); break; case VADDR_LCD: - LCD::Write(var, addr); + LCD::Read(var, addr); break; default: LOG_ERROR(HW_Memory, "unknown Read%lu @ 0x%08X", sizeof(var) * 8, addr); diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 684ec9818..9628a7589 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -372,9 +372,9 @@ struct Regs { INSERT_PADDING_WORDS(0x2); const std::array<Regs::TevStageConfig,6> GetTevStages() const { - return { tev_stage0, tev_stage1, - tev_stage2, tev_stage3, - tev_stage4, tev_stage5 }; + return {{ tev_stage0, tev_stage1, + tev_stage2, tev_stage3, + tev_stage4, tev_stage5 }}; }; enum class BlendEquation : u32 { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d31c46cca..518f79331 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -94,14 +94,27 @@ void RasterizerOpenGL::InitObjects() { // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation fb_color_texture.texture.Create(); ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1); + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = fb_color_texture.texture.handle; + state.Apply(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + state.texture_units[0].texture_2d = 0; + state.Apply(); + fb_depth_texture.texture.Create(); ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1); + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; + state.Apply(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -110,14 +123,13 @@ void RasterizerOpenGL::InitObjects() { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE); + state.texture_units[0].texture_2d = 0; + state.Apply(); + // Configure OpenGL framebuffer framebuffer.Create(); state.draw.framebuffer = framebuffer.handle; - - // Unbind texture to allow binding to framebuffer - state.texture_units[0].enabled_2d = true; - state.texture_units[0].texture_2d = 0; state.Apply(); glActiveTexture(GL_TEXTURE0); @@ -205,7 +217,19 @@ void RasterizerOpenGL::DrawTriangles() { vertex_batch.clear(); - // TODO: Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture + // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture + const auto& regs = Pica::g_state.regs; + + PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); + u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) + * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + + PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); + u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) + * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + + res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size); + res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size); } void RasterizerOpenGL::CommitFramebuffer() { @@ -472,6 +496,9 @@ void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs: glActiveTexture(GL_TEXTURE0); glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, texture.gl_format, texture.gl_type, nullptr); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) { @@ -491,7 +518,7 @@ void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica:: case Pica::Regs::DepthFormat::D24: internal_format = GL_DEPTH_COMPONENT24; texture.gl_format = GL_DEPTH_COMPONENT; - texture.gl_type = GL_UNSIGNED_INT_24_8; + texture.gl_type = GL_UNSIGNED_INT; break; case Pica::Regs::DepthFormat::D24S8: @@ -513,6 +540,9 @@ void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica:: glActiveTexture(GL_TEXTURE0); glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, texture.gl_format, texture.gl_type, nullptr); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } void RasterizerOpenGL::SyncFramebuffer() { @@ -652,6 +682,10 @@ void RasterizerOpenGL::SyncDepthTest() { const auto& regs = Pica::g_state.regs; state.depth.test_enabled = (regs.output_merger.depth_test_enable == 1); state.depth.test_func = PicaToGL::CompareFunc(regs.output_merger.depth_test_func); + state.color_mask.red_enabled = regs.output_merger.red_enable; + state.color_mask.green_enabled = regs.output_merger.green_enable; + state.color_mask.blue_enabled = regs.output_merger.blue_enable; + state.color_mask.alpha_enabled = regs.output_merger.alpha_enable; state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; } @@ -759,10 +793,10 @@ void RasterizerOpenGL::ReloadColorBuffer() { for (int x = 0; x < fb_color_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_px_idx = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_color_texture.width) * bytes_per_pixel; u8* pixel = color_buffer + dst_offset; - memcpy(&temp_fb_color_buffer[gl_px_idx], pixel, bytes_per_pixel); + memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel); } } @@ -773,6 +807,9 @@ void RasterizerOpenGL::ReloadColorBuffer() { glActiveTexture(GL_TEXTURE0); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get()); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } void RasterizerOpenGL::ReloadDepthBuffer() { @@ -790,29 +827,29 @@ void RasterizerOpenGL::ReloadDepthBuffer() { std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]); - for (int y = 0; y < fb_depth_texture.height; ++y) { - for (int x = 0; x < fb_depth_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_px_idx = x + y * fb_depth_texture.width; - - switch (fb_depth_texture.format) { - case Pica::Regs::DepthFormat::D16: - ((u16*)temp_fb_depth_buffer.get())[gl_px_idx] = Color::DecodeD16(depth_buffer + dst_offset); - break; - case Pica::Regs::DepthFormat::D24: - ((u32*)temp_fb_depth_buffer.get())[gl_px_idx] = Color::DecodeD24(depth_buffer + dst_offset); - break; - case Pica::Regs::DepthFormat::D24S8: - { - Math::Vec2<u32> depth_stencil = Color::DecodeD24S8(depth_buffer + dst_offset); - ((u32*)temp_fb_depth_buffer.get())[gl_px_idx] = (depth_stencil.x << 8) | depth_stencil.y; - break; + u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get(); + + if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_depth_texture.width); + + u8* pixel = depth_buffer + dst_offset; + u32 depth_stencil = *(u32*)pixel; + ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24); } - default: - LOG_CRITICAL(Render_OpenGL, "Unknown memory framebuffer depth format %x", fb_depth_texture.format); - UNIMPLEMENTED(); - break; + } + } else { + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_depth_texture.width) * gl_bpp; + + u8* pixel = depth_buffer + dst_offset; + memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel); } } } @@ -824,6 +861,9 @@ void RasterizerOpenGL::ReloadDepthBuffer() { glActiveTexture(GL_TEXTURE0); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } void RasterizerOpenGL::CommitColorBuffer() { @@ -842,15 +882,18 @@ void RasterizerOpenGL::CommitColorBuffer() { glActiveTexture(GL_TEXTURE0); glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get()); + state.texture_units[0].texture_2d = 0; + state.Apply(); + // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. for (int y = 0; y < fb_color_texture.height; ++y) { for (int x = 0; x < fb_color_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_px_idx = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + u32 gl_pixel_index = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; u8* pixel = color_buffer + dst_offset; - memcpy(pixel, &temp_gl_color_buffer[gl_px_idx], bytes_per_pixel); + memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel); } } } @@ -877,29 +920,32 @@ void RasterizerOpenGL::CommitDepthBuffer() { glActiveTexture(GL_TEXTURE0); glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get()); - for (int y = 0; y < fb_depth_texture.height; ++y) { - for (int x = 0; x < fb_depth_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_px_idx = x + y * fb_depth_texture.width; - - switch (fb_depth_texture.format) { - case Pica::Regs::DepthFormat::D16: - Color::EncodeD16(((u16*)temp_gl_depth_buffer.get())[gl_px_idx], depth_buffer + dst_offset); - break; - case Pica::Regs::DepthFormat::D24: - Color::EncodeD24(((u32*)temp_gl_depth_buffer.get())[gl_px_idx], depth_buffer + dst_offset); - break; - case Pica::Regs::DepthFormat::D24S8: - { - u32 depth_stencil = ((u32*)temp_gl_depth_buffer.get())[gl_px_idx]; - Color::EncodeD24S8((depth_stencil >> 8), depth_stencil & 0xFF, depth_buffer + dst_offset); - break; + state.texture_units[0].texture_2d = 0; + state.Apply(); + + u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get(); + + if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_depth_texture.width); + + u8* pixel = depth_buffer + dst_offset; + u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index]; + *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24); } - default: - LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", fb_depth_texture.format); - UNIMPLEMENTED(); - break; + } + } else { + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_depth_texture.width) * gl_bpp; + + u8* pixel = depth_buffer + dst_offset; + memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel); } } } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 9c5f38f94..3526e16d5 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -16,6 +16,11 @@ OpenGLState::OpenGLState() { depth.test_func = GL_LESS; depth.write_mask = GL_TRUE; + color_mask.red_enabled = GL_TRUE; + color_mask.green_enabled = GL_TRUE; + color_mask.blue_enabled = GL_TRUE; + color_mask.alpha_enabled = GL_TRUE; + stencil.test_enabled = false; stencil.test_func = GL_ALWAYS; stencil.test_ref = 0; @@ -77,6 +82,15 @@ void OpenGLState::Apply() { glDepthMask(depth.write_mask); } + // Color mask + if (color_mask.red_enabled != cur_state.color_mask.red_enabled || + color_mask.green_enabled != cur_state.color_mask.green_enabled || + color_mask.blue_enabled != cur_state.color_mask.blue_enabled || + color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { + glColorMask(color_mask.red_enabled, color_mask.green_enabled, + color_mask.blue_enabled, color_mask.alpha_enabled); + } + // Stencil test if (stencil.test_enabled != cur_state.stencil.test_enabled) { if (stencil.test_enabled) { @@ -87,8 +101,8 @@ void OpenGLState::Apply() { } if (stencil.test_func != cur_state.stencil.test_func || - stencil.test_ref != cur_state.stencil.test_ref || - stencil.test_mask != cur_state.stencil.test_mask) { + stencil.test_ref != cur_state.stencil.test_ref || + stencil.test_mask != cur_state.stencil.test_mask) { glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); } @@ -112,17 +126,19 @@ void OpenGLState::Apply() { } if (blend.color.red != cur_state.blend.color.red || - blend.color.green != cur_state.blend.color.green || - blend.color.blue != cur_state.blend.color.blue || - blend.color.alpha != cur_state.blend.color.alpha) { - glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha); + blend.color.green != cur_state.blend.color.green || + blend.color.blue != cur_state.blend.color.blue || + blend.color.alpha != cur_state.blend.color.alpha) { + glBlendColor(blend.color.red, blend.color.green, + blend.color.blue, blend.color.alpha); } if (blend.src_rgb_func != cur_state.blend.src_rgb_func || - blend.dst_rgb_func != cur_state.blend.dst_rgb_func || - blend.src_a_func != cur_state.blend.src_a_func || - blend.dst_a_func != cur_state.blend.dst_a_func) { - glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func); + blend.dst_rgb_func != cur_state.blend.dst_rgb_func || + blend.src_a_func != cur_state.blend.src_a_func || + blend.dst_a_func != cur_state.blend.dst_a_func) { + glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, + blend.src_a_func, blend.dst_a_func); } if (logic_op != cur_state.logic_op) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 6b97721d6..26b916360 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -20,6 +20,13 @@ public: } depth; struct { + GLboolean red_enabled; + GLboolean green_enabled; + GLboolean blue_enabled; + GLboolean alpha_enabled; + } color_mask; // GL_COLOR_WRITEMASK + + struct { bool test_enabled; // GL_STENCIL_TEST GLenum test_func; // GL_STENCIL_FUNC GLint test_ref; // GL_STENCIL_REF diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 382aeaa05..3399ca123 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -170,6 +170,9 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& texture.gl_format, texture.gl_type, framebuffer_data); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } /** @@ -239,6 +242,9 @@ void RendererOpenGL::InitOpenGLObjects() { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } + state.texture_units[0].texture_2d = 0; + state.Apply(); + hw_rasterizer->InitObjects(); } @@ -370,6 +376,8 @@ void RendererOpenGL::Init() { } LOG_INFO(Render_OpenGL, "GL_VERSION: %s", glGetString(GL_VERSION)); + LOG_INFO(Render_OpenGL, "GL_VENDOR: %s", glGetString(GL_VENDOR)); + LOG_INFO(Render_OpenGL, "GL_RENDERER: %s", glGetString(GL_RENDERER)); InitOpenGLObjects(); } |