diff options
-rw-r--r-- | appveyor.yml | 2 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 14 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 86 |
3 files changed, 87 insertions, 15 deletions
diff --git a/appveyor.yml b/appveyor.yml index a6f12b267..436d98fa1 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -53,7 +53,7 @@ build_script: # https://www.appveyor.com/docs/build-phase msbuild msvc_build/yuzu.sln /maxcpucount /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" } else { - C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -C mingw_build/ 2>&1' + C:\msys64\usr\bin\bash.exe -lc 'mingw32-make -j4 -C mingw_build/ 2>&1' } after_build: diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index b038a9d92..8ae0e6df2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -79,6 +79,9 @@ union Attribute { // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval // shader. TessCoordInstanceIDVertexID = 47, + // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment + // shader. It is unknown what the other values contain. + FrontFacing = 63, }; union { @@ -214,6 +217,11 @@ enum class FlowCondition : u64 { Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? }; +enum class PredicateResultMode : u64 { + None = 0x0, + NotZero = 0x3, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -254,7 +262,7 @@ union Instruction { BitField<39, 1, u64> invert_a; BitField<40, 1, u64> invert_b; BitField<41, 2, LogicOperation> operation; - BitField<44, 2, u64> unk44; + BitField<44, 2, PredicateResultMode> pred_result_mode; BitField<48, 3, Pred> pred48; } lop; @@ -513,6 +521,8 @@ public: LD_A, LD_C, ST_A, + LDG, // Load from global memory + STG, // Store in global memory TEX, TEXQ, // Texture Query TEXS, // Texture Fetch with scalar/non-vec4 source/destinations @@ -724,6 +734,8 @@ private: INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), + INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), + INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index bb01b3c27..7ae745356 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -542,6 +542,10 @@ private: // shader. ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))"; + case Attribute::Index::FrontFacing: + // TODO(Subv): Find out what the values are for the other elements. + ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); + return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))"; default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; @@ -756,28 +760,51 @@ private: } void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, - const std::string& op_b) { + const std::string& op_b, + Tegra::Shader::PredicateResultMode predicate_mode, + Tegra::Shader::Pred predicate) { + std::string result{}; switch (logic_op) { case LogicOperation::And: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1); + result = '(' + op_a + " & " + op_b + ')'; break; } case LogicOperation::Or: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1); + result = '(' + op_a + " | " + op_b + ')'; break; } case LogicOperation::Xor: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1); + result = '(' + op_a + " ^ " + op_b + ')'; break; } case LogicOperation::PassB: { - regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1); + result = op_b; break; } default: LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op)); UNREACHABLE(); } + + if (dest != Tegra::Shader::Register::ZeroIndex) { + regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); + } + + using Tegra::Shader::PredicateResultMode; + // Write the predicate value depending on the predicate mode. + switch (predicate_mode) { + case PredicateResultMode::None: + // Do nothing. + return; + case PredicateResultMode::NotZero: + // Set the predicate to true if the result is not zero. + SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0"); + break; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}", + static_cast<u32>(predicate_mode)); + UNREACHABLE(); + } } void WriteTexsInstruction(const Instruction& instr, const std::string& coord, @@ -815,6 +842,33 @@ private: shader.AddLine('}'); } + /* + * Emits code to push the input target address to the SSY address stack, incrementing the stack + * top. + */ + void EmitPushToSSYStack(u32 target) { + shader.AddLine('{'); + ++shader.scope; + shader.AddLine("ssy_stack[ssy_stack_top] = " + std::to_string(target) + "u;"); + shader.AddLine("ssy_stack_top++;"); + --shader.scope; + shader.AddLine('}'); + } + + /* + * Emits code to pop an address from the SSY address stack, setting the jump address to the + * popped address and decrementing the stack top. + */ + void EmitPopFromSSYStack() { + shader.AddLine('{'); + ++shader.scope; + shader.AddLine("ssy_stack_top--;"); + shader.AddLine("jmp_to = ssy_stack[ssy_stack_top];"); + shader.AddLine("break;"); + --shader.scope; + shader.AddLine('}'); + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -1099,7 +1153,9 @@ private: if (instr.alu.lop32i.invert_b) op_b = "~(" + op_b + ')'; - WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b); + WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, + Tegra::Shader::PredicateResultMode::None, + Tegra::Shader::Pred::UnusedIndex); break; } default: { @@ -1165,16 +1221,14 @@ private: case OpCode::Id::LOP_C: case OpCode::Id::LOP_R: case OpCode::Id::LOP_IMM: { - ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented"); - ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented"); - if (instr.alu.lop.invert_a) op_a = "~(" + op_a + ')'; if (instr.alu.lop.invert_b) op_b = "~(" + op_b + ')'; - WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b); + WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, + instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); break; } case OpCode::Id::IMNMX_C: @@ -1843,13 +1897,13 @@ private: ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported"); u32 target = offset + instr.bra.GetBranchTarget(); - shader.AddLine("ssy_target = " + std::to_string(target) + "u;"); + EmitPushToSSYStack(target); break; } case OpCode::Id::SYNC: { // The SYNC opcode jumps to the address previously set by the SSY opcode ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); - shader.AddLine("{ jmp_to = ssy_target; break; }"); + EmitPopFromSSYStack(); break; } case OpCode::Id::DEPBAR: { @@ -1920,7 +1974,13 @@ private: } else { labels.insert(subroutine.begin); shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); - shader.AddLine("uint ssy_target = 0u;"); + + // TODO(Subv): Figure out the actual depth of the SSY stack, for now it seems + // unlikely that shaders will use 20 nested SSYs. + constexpr u32 SSY_STACK_SIZE = 20; + shader.AddLine("uint ssy_stack[" + std::to_string(SSY_STACK_SIZE) + "];"); + shader.AddLine("uint ssy_stack_top = 0u;"); + shader.AddLine("while (true) {"); ++shader.scope; |