From a63a0daa5e773574019ec521c0a07096efbdcd36 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 3 Jun 2020 18:07:35 -0300 Subject: gl_arb_decompiler: Implement an assembly shader decompiler Emit code compatible with NV_gpu_program5. This should emit code compatible with Fermi, but it wasn't tested on that architecture. Pascal has some issues not present on Turing GPUs. --- src/common/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/common') diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 24b7a083c..0a3e2f4d1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp DEPENDS # WARNING! It was too much work to try and make a common location for this list, # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well + "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" + "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" -- cgit v1.2.3 From 4417770ba9a1d48ded255e75c32dcc1005b912c1 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 18:59:01 +0100 Subject: xbyak_abi: Fix ABI_PushRegistersAndAdjustStack Pushing GPRs twice. --- src/common/x64/xbyak_abi.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/common') diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 794da8a52..d15e1aaf0 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -178,21 +178,17 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b size_t rsp_alignment, size_t needed_frame_size = 0) { s32 subtraction, xmm_offset; ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_GPRS[i]) { code.push(IndexToReg64(static_cast(i))); } } + if (subtraction != 0) { code.sub(code.rsp, subtraction); } - for (int i = 0; i < regs.count(); i++) { - if (regs.test(i) & ABI_ALL_GPRS.test(i)) { - code.push(IndexToReg64(i)); - } - } - for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast(i))); -- cgit v1.2.3 From d563017dfe63aaa26e7c08369995838f8b9fdafb Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 18:59:54 +0100 Subject: xbyak_abi: Remove *GPS variants of stack manipulation functions --- src/common/x64/xbyak_abi.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'src/common') diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index d15e1aaf0..1dcd147b6 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -223,40 +223,4 @@ inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bits } } -inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, - size_t rsp_alignment, - size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - - for (std::size_t i = 0; i < regs.size(); ++i) { - if (regs[i] && ABI_ALL_GPRS[i]) { - code.push(IndexToReg64(static_cast(i))); - } - } - - if (subtraction != 0) { - code.sub(code.rsp, subtraction); - } - - return ABI_SHADOW_SPACE; -} - -inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, - size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); - - if (subtraction != 0) { - code.add(code.rsp, subtraction); - } - - // GPRs need to be popped in reverse order - for (int i = 15; i >= 0; i--) { - if (regs[i]) { - code.pop(IndexToReg64(i)); - } - } -} - } // namespace Common::X64 -- cgit v1.2.3 From 36362e9695a988e4727adcfb52f265394d55e8d5 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 19:02:06 +0100 Subject: xbyak_abi: Register indexes should be unsigned --- src/common/x64/xbyak_abi.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'src/common') diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 1dcd147b6..33a96d6cb 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -11,7 +11,7 @@ namespace Common::X64 { -inline int RegToIndex(const Xbyak::Reg& reg) { +inline std::size_t RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); } -inline Xbyak::Reg64 IndexToReg64(int reg_index) { +inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { ASSERT(reg_index < 16); - return Xbyak::Reg64(reg_index); + return Xbyak::Reg64(static_cast(reg_index)); } -inline Xbyak::Xmm IndexToXmm(int reg_index) { +inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { ASSERT(reg_index >= 16 && reg_index < 32); - return Xbyak::Xmm(reg_index - 16); + return Xbyak::Xmm(static_cast(reg_index - 16)); } -inline Xbyak::Reg IndexToReg(int reg_index) { +inline Xbyak::Reg IndexToReg(std::size_t reg_index) { if (reg_index < 16) { return IndexToReg64(reg_index); } else { @@ -181,7 +181,7 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_GPRS[i]) { - code.push(IndexToReg64(static_cast(i))); + code.push(IndexToReg64(i)); } } @@ -191,7 +191,7 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast(i))); + code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(i)); xmm_offset += 0x10; } } @@ -206,7 +206,7 @@ inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bits for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(IndexToXmm(static_cast(i)), code.xword[code.rsp + xmm_offset]); + code.movaps(IndexToXmm(i), code.xword[code.rsp + xmm_offset]); xmm_offset += 0x10; } } @@ -216,8 +216,9 @@ inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bits } // GPRs need to be popped in reverse order - for (int i = 15; i >= 0; i--) { - if (regs[i]) { + for (std::size_t j = 0; j < regs.size(); ++j) { + const std::size_t i = regs.size() - j - 1; + if (regs[i] && ABI_ALL_GPRS[i]) { code.pop(IndexToReg64(i)); } } -- cgit v1.2.3 From 7c6203dc5ed9a8b5ae9154602d27717569851571 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 19:04:29 +0100 Subject: xbyak_abi: Prefer returning a struct to using out parameters in ABI_CalculateFrameSize --- src/common/x64/xbyak_abi.h | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'src/common') diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 33a96d6cb..a5f5d4fc1 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0; #endif -inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, - size_t needed_frame_size, s32* out_subtraction, - s32* out_xmm_offset) { +struct ABIFrameInfo { + s32 subtraction; + s32 xmm_offset; +}; + +inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, + size_t needed_frame_size) { const auto count = (regs & ABI_ALL_GPRS).count(); rsp_alignment -= count * 8; size_t subtraction = 0; @@ -170,14 +174,13 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, rsp_alignment -= subtraction; subtraction += rsp_alignment & 0xF; - *out_subtraction = (s32)subtraction; - *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); + return ABIFrameInfo{static_cast(subtraction), + static_cast(subtraction - xmm_base_subtraction)}; } inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_GPRS[i]) { @@ -185,14 +188,14 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b } } - if (subtraction != 0) { - code.sub(code.rsp, subtraction); + if (frame_info.subtraction != 0) { + code.sub(code.rsp, frame_info.subtraction); } for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(i)); - xmm_offset += 0x10; + code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); + frame_info.xmm_offset += 0x10; } } @@ -201,18 +204,17 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, size_t rsp_alignment, size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); for (std::size_t i = 0; i < regs.size(); ++i) { if (regs[i] && ABI_ALL_XMMS[i]) { - code.movaps(IndexToXmm(i), code.xword[code.rsp + xmm_offset]); - xmm_offset += 0x10; + code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); + frame_info.xmm_offset += 0x10; } } - if (subtraction != 0) { - code.add(code.rsp, subtraction); + if (frame_info.subtraction != 0) { + code.add(code.rsp, frame_info.subtraction); } // GPRs need to be popped in reverse order -- cgit v1.2.3 From d6474b4aca7f054d00df350c716709475ef0f49b Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 16 May 2020 07:24:57 -0400 Subject: common/cpu_detect: Add AVX512 detection --- src/common/x64/cpu_detect.cpp | 5 +++++ src/common/x64/cpu_detect.h | 1 + 2 files changed, 6 insertions(+) (limited to 'src/common') diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c9349a6b4..f35dcb498 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -110,6 +110,11 @@ static CPUCaps Detect() { caps.bmi1 = true; if ((cpu_id[1] >> 8) & 1) caps.bmi2 = true; + // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) + if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && + (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { + caps.avx512 = caps.avx2; + } } } diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 20f2ba234..7606c3f7b 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -19,6 +19,7 @@ struct CPUCaps { bool lzcnt; bool avx; bool avx2; + bool avx512; bool bmi1; bool bmi2; bool fma; -- cgit v1.2.3 From 97ba520434cceb42af3b17a59c731dd734e9108f Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 16 May 2020 07:25:13 -0400 Subject: common/telemetry: Add AVX512 to telemetry --- src/common/telemetry.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/common') diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 200c6489a..16d42facd 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) { fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); + fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); -- cgit v1.2.3