diff options
-rw-r--r-- | src/core/hle/kernel/errors.h | 2 | ||||
-rw-r--r-- | src/core/hle/kernel/svc.cpp | 91 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 5 | ||||
-rw-r--r-- | src/core/hle/service/audio/hwopus.cpp | 37 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 7 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvmap.cpp | 72 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 36 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 82 |
8 files changed, 298 insertions, 34 deletions
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h index e5fa67ae8..885259618 100644 --- a/src/core/hle/kernel/errors.h +++ b/src/core/hle/kernel/errors.h @@ -22,6 +22,7 @@ enum { HandleTableFull = 105, InvalidMemoryState = 106, InvalidMemoryPermissions = 108, + InvalidMemoryRange = 110, InvalidThreadPriority = 112, InvalidProcessorId = 113, InvalidHandle = 114, @@ -56,6 +57,7 @@ constexpr ResultCode ERR_INVALID_ADDRESS(ErrorModule::Kernel, ErrCodes::InvalidA constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState); constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel, ErrCodes::InvalidMemoryPermissions); +constexpr ResultCode ERR_INVALID_MEMORY_RANGE(ErrorModule::Kernel, ErrCodes::InvalidMemoryRange); constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle); constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId); constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize); diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 3afcce3fe..3e4dd61dc 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -39,6 +39,73 @@ namespace { constexpr bool Is4KBAligned(VAddr address) { return (address & 0xFFF) == 0; } + +// Checks if address + size is greater than the given address +// This can return false if the size causes an overflow of a 64-bit type +// or if the given size is zero. +constexpr bool IsValidAddressRange(VAddr address, u64 size) { + return address + size > address; +} + +// Checks if a given address range lies within a larger address range. +constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin, + VAddr address_range_end) { + const VAddr end_address = address + size - 1; + return address_range_begin <= address && end_address <= address_range_end - 1; +} + +bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) { + return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(), + vm.GetAddressSpaceEndAddress()); +} + +bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) { + return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(), + vm.GetNewMapRegionEndAddress()); +} + +// Helper function that performs the common sanity checks for svcMapMemory +// and svcUnmapMemory. This is doable, as both functions perform their sanitizing +// in the same order. +ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_addr, VAddr src_addr, + u64 size) { + if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) { + return ERR_INVALID_ADDRESS; + } + + if (size == 0 || !Is4KBAligned(size)) { + return ERR_INVALID_SIZE; + } + + if (!IsValidAddressRange(dst_addr, size)) { + return ERR_INVALID_ADDRESS_STATE; + } + + if (!IsValidAddressRange(src_addr, size)) { + return ERR_INVALID_ADDRESS_STATE; + } + + if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { + return ERR_INVALID_ADDRESS_STATE; + } + + if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { + return ERR_INVALID_MEMORY_RANGE; + } + + const VAddr dst_end_address = dst_addr + size; + if (dst_end_address > vm_manager.GetHeapRegionBaseAddress() && + dst_addr < vm_manager.GetHeapRegionEndAddress()) { + return ERR_INVALID_MEMORY_RANGE; + } + + if (dst_end_address > vm_manager.GetNewMapRegionBaseAddress() && + dst_addr < vm_manager.GetMapRegionEndAddress()) { + return ERR_INVALID_MEMORY_RANGE; + } + + return RESULT_SUCCESS; +} } // Anonymous namespace /// Set the process heap to a given Size. It can both extend and shrink the heap. @@ -69,15 +136,15 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) { LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, src_addr, size); - if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) { - return ERR_INVALID_ADDRESS; - } + auto* const current_process = Core::CurrentProcess(); + const auto& vm_manager = current_process->VMManager(); - if (size == 0 || !Is4KBAligned(size)) { - return ERR_INVALID_SIZE; + const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size); + if (result != RESULT_SUCCESS) { + return result; } - return Core::CurrentProcess()->MirrorMemory(dst_addr, src_addr, size); + return current_process->MirrorMemory(dst_addr, src_addr, size); } /// Unmaps a region that was previously mapped with svcMapMemory @@ -85,15 +152,15 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) { LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr, src_addr, size); - if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) { - return ERR_INVALID_ADDRESS; - } + auto* const current_process = Core::CurrentProcess(); + const auto& vm_manager = current_process->VMManager(); - if (size == 0 || !Is4KBAligned(size)) { - return ERR_INVALID_SIZE; + const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size); + if (result != RESULT_SUCCESS) { + return result; } - return Core::CurrentProcess()->UnmapMemory(dst_addr, src_addr, size); + return current_process->UnmapMemory(dst_addr, src_addr, size); } /// Connect to an OS service given the port name, returns the handle to the port to out diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 33aed8c23..352ce1725 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -183,13 +183,10 @@ void Thread::ResumeFromWait() { */ static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top, VAddr entry_point, u64 arg) { - memset(&context, 0, sizeof(Core::ARM_Interface::ThreadContext)); - + context = {}; context.cpu_registers[0] = arg; context.pc = entry_point; context.sp = stack_top; - context.pstate = 0; - context.fpcr = 0; } ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name, VAddr entry_point, diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index fc6067e59..7168c6a10 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> #include <cstring> #include <memory> +#include <optional> #include <vector> #include <opus.h> @@ -33,7 +35,8 @@ public: {1, nullptr, "SetContext"}, {2, nullptr, "DecodeInterleavedForMultiStream"}, {3, nullptr, "SetContextForMultiStream"}, - {4, nullptr, "Unknown4"}, + {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance, + "DecodeInterleavedWithPerformance"}, {5, nullptr, "Unknown5"}, {6, nullptr, "Unknown6"}, {7, nullptr, "Unknown7"}, @@ -59,8 +62,31 @@ private: ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); } - bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, - std::vector<opus_int16>& output) { + void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) { + u32 consumed = 0; + u32 sample_count = 0; + u64 performance = 0; + std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); + if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, + performance)) { + IPC::ResponseBuilder rb{ctx, 2}; + // TODO(ogniK): Use correct error code + rb.Push(ResultCode(-1)); + return; + } + IPC::ResponseBuilder rb{ctx, 6}; + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(consumed); + rb.Push<u64>(performance); + rb.Push<u32>(sample_count); + ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); + } + + bool Decoder_DecodeInterleaved( + u32& consumed, u32& sample_count, const std::vector<u8>& input, + std::vector<opus_int16>& output, + std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) { + const auto start_time = std::chrono::high_resolution_clock::now(); std::size_t raw_output_sz = output.size() * sizeof(opus_int16); if (sizeof(OpusHeader) > input.size()) return false; @@ -80,8 +106,13 @@ private: (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)), 0); if (out_sample_count < 0) return false; + const auto end_time = std::chrono::high_resolution_clock::now() - start_time; sample_count = out_sample_count; consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); + if (performance_time.has_value()) { + performance_time->get() = + std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); + } return true; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 884837b17..c41ef7058 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -174,10 +174,11 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou auto& system_instance = Core::System::GetInstance(); // Remove this memory region from the rasterizer cache. - system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(params.offset, - itr->second.size); - auto& gpu = system_instance.GPU(); + auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); + ASSERT(cpu_addr); + system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); + params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); buffer_mappings.erase(itr->second.offset); diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index a2287cc1b..43651d8a6 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -11,6 +11,13 @@ namespace Service::Nvidia::Devices { +namespace NvErrCodes { +enum { + OperationNotPermitted = -1, + InvalidValue = -22, +}; +} + nvmap::nvmap() = default; nvmap::~nvmap() = default; @@ -44,7 +51,11 @@ u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& o u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) { IocCreateParams params; std::memcpy(¶ms, input.data(), sizeof(params)); + LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size); + if (!params.size) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } // Create a new nvmap object and obtain a handle to it. auto object = std::make_shared<Object>(); object->id = next_id++; @@ -55,8 +66,6 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) { u32 handle = next_handle++; handles[handle] = std::move(object); - LOG_DEBUG(Service_NVDRV, "size=0x{:08X}", params.size); - params.handle = handle; std::memcpy(output.data(), ¶ms, sizeof(params)); @@ -66,9 +75,29 @@ u32 nvmap::IocCreate(const std::vector<u8>& input, std::vector<u8>& output) { u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) { IocAllocParams params; std::memcpy(¶ms, input.data(), sizeof(params)); + LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr); + + if (!params.handle) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } + + if ((params.align - 1) & params.align) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } + + const u32 min_alignment = 0x1000; + if (params.align < min_alignment) { + params.align = min_alignment; + } auto object = GetObject(params.handle); - ASSERT(object); + if (!object) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } + + if (object->status == Object::Status::Allocated) { + return static_cast<u32>(NvErrCodes::OperationNotPermitted); + } object->flags = params.flags; object->align = params.align; @@ -76,8 +105,6 @@ u32 nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) { object->addr = params.addr; object->status = Object::Status::Allocated; - LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.addr); - std::memcpy(output.data(), ¶ms, sizeof(params)); return 0; } @@ -88,8 +115,14 @@ u32 nvmap::IocGetId(const std::vector<u8>& input, std::vector<u8>& output) { LOG_WARNING(Service_NVDRV, "called"); + if (!params.handle) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } + auto object = GetObject(params.handle); - ASSERT(object); + if (!object) { + return static_cast<u32>(NvErrCodes::OperationNotPermitted); + } params.id = object->id; @@ -105,7 +138,14 @@ u32 nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output) { auto itr = std::find_if(handles.begin(), handles.end(), [&](const auto& entry) { return entry.second->id == params.id; }); - ASSERT(itr != handles.end()); + if (itr == handles.end()) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } + + auto& object = itr->second; + if (object->status != Object::Status::Allocated) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } itr->second->refcount++; @@ -125,8 +165,13 @@ u32 nvmap::IocParam(const std::vector<u8>& input, std::vector<u8>& output) { LOG_WARNING(Service_NVDRV, "(STUBBED) called type={}", params.param); auto object = GetObject(params.handle); - ASSERT(object); - ASSERT(object->status == Object::Status::Allocated); + if (!object) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } + + if (object->status != Object::Status::Allocated) { + return static_cast<u32>(NvErrCodes::OperationNotPermitted); + } switch (static_cast<ParamTypes>(params.param)) { case ParamTypes::Size: @@ -163,9 +208,12 @@ u32 nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) { LOG_WARNING(Service_NVDRV, "(STUBBED) called"); auto itr = handles.find(params.handle); - ASSERT(itr != handles.end()); - - ASSERT(itr->second->refcount > 0); + if (itr == handles.end()) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } + if (!itr->second->refcount) { + return static_cast<u32>(NvErrCodes::InvalidValue); + } itr->second->refcount--; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 550ab1148..9a59b65b3 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 { XHi = 3, }; +enum class VmadType : u64 { + Size16_Low = 0, + Size16_High = 1, + Size32 = 2, + Invalid = 3, +}; + +enum class VmadShr : u64 { + Shr7 = 1, + Shr15 = 2, +}; + enum class XmadMode : u64 { None = 0, CLo = 1, @@ -452,6 +464,7 @@ union Instruction { BitField<48, 16, u64> opcode; union { + BitField<20, 16, u64> imm20_16; BitField<20, 19, u64> imm20_19; BitField<20, 32, s64> imm20_32; BitField<45, 1, u64> negate_b; @@ -493,6 +506,10 @@ union Instruction { } } lop3; + u16 GetImm20_16() const { + return static_cast<u16>(imm20_16); + } + u32 GetImm20_19() const { u32 imm{static_cast<u32>(imm20_19)}; imm <<= 12; @@ -1017,6 +1034,23 @@ union Instruction { } isberd; union { + BitField<48, 1, u64> signed_a; + BitField<38, 1, u64> is_byte_chunk_a; + BitField<36, 2, VmadType> type_a; + BitField<36, 2, u64> byte_height_a; + + BitField<49, 1, u64> signed_b; + BitField<50, 1, u64> use_register_b; + BitField<30, 1, u64> is_byte_chunk_b; + BitField<28, 2, VmadType> type_b; + BitField<28, 2, u64> byte_height_b; + + BitField<51, 2, VmadShr> shr; + BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) + BitField<47, 1, u64> cc; + } vmad; + + union { BitField<20, 16, u64> imm20_16; BitField<36, 1, u64> product_shift_left; BitField<37, 1, u64> merge_37; @@ -1083,6 +1117,7 @@ public: IPA, OUT_R, // Emit vertex/primitive ISBERD, + VMAD, FFMA_IMM, // Fused Multiply and Add FFMA_CR, FFMA_RC, @@ -1320,6 +1355,7 @@ private: INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), + INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c82a0dcfa..8dfb49507 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2953,6 +2953,88 @@ private: LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); break; } + case OpCode::Id::VMAD: { + const bool signed_a = instr.vmad.signed_a == 1; + const bool signed_b = instr.vmad.signed_b == 1; + const bool result_signed = signed_a || signed_b; + boost::optional<std::string> forced_result; + + auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed, + Tegra::Shader::VmadType type, u64 byte_height) { + const std::string value = [&]() { + if (!is_chunk) { + const auto offset = static_cast<u32>(byte_height * 8); + return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)"; + } + const std::string zero = "0"; + + switch (type) { + case Tegra::Shader::VmadType::Size16_Low: + return '(' + op + " & 0xffff)"; + case Tegra::Shader::VmadType::Size16_High: + return '(' + op + " >> 16)"; + case Tegra::Shader::VmadType::Size32: + // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when + // this type is used (1 * 1 + 0 == 0x5b800000). Until a better + // explanation is found: assert. + UNREACHABLE_MSG("Unimplemented"); + return zero; + case Tegra::Shader::VmadType::Invalid: + // Note(Rodrigo): This flag is invalid according to nvdisasm. From my + // testing (even though it's invalid) this makes the whole instruction + // assign zero to target register. + forced_result = boost::make_optional(zero); + return zero; + default: + UNREACHABLE(); + return zero; + } + }(); + + if (is_signed) { + return "int(" + value + ')'; + } + return value; + }; + + const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false), + instr.vmad.is_byte_chunk_a != 0, signed_a, + instr.vmad.type_a, instr.vmad.byte_height_a); + + std::string op_b; + if (instr.vmad.use_register_b) { + op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false), + instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b, + instr.vmad.byte_height_b); + } else { + op_b = '(' + + std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) + : instr.alu.GetImm20_16()) + + ')'; + } + + const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); + + std::string result; + if (forced_result) { + result = *forced_result; + } else { + result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; + + switch (instr.vmad.shr) { + case Tegra::Shader::VmadShr::Shr7: + result = '(' + result + " >> 7)"; + break; + case Tegra::Shader::VmadShr::Shr15: + result = '(' + result + " >> 15)"; + break; + } + } + regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, + instr.vmad.saturate == 1, 0, Register::Size::Word, + instr.vmad.cc); + break; + } default: { LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); UNREACHABLE(); |