diff options
| author | bunnei <bunneidev@gmail.com> | 2018-10-28 13:06:21 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-10-28 13:06:21 -0400 | 
| commit | aa1cf608ed20984c410fc215d9f73937abe76ddc (patch) | |
| tree | 1167c5b03eb2526f2704195a27d50f11430f583c | |
| parent | 4ddbd9bbaf3b2cf674638ac45f5192ecc41cad08 (diff) | |
| parent | e5ca097e32d9ec03b1ba4b5e44e3b6553e3addd4 (diff) | |
Merge pull request #1601 from FernandoS27/shader-precision
Improved Shader accuracy on Vertex and Geometry Shaders.
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 55 | 
1 files changed, 35 insertions, 20 deletions
| diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index dec291a7d..dcf6941b0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -341,10 +341,10 @@ public:       */      void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,                              u64 dest_num_components, u64 value_num_components, -                            bool is_saturated = false, u64 dest_elem = 0) { +                            bool is_saturated = false, u64 dest_elem = 0, bool precise = false) {          SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, -                    dest_num_components, value_num_components, dest_elem); +                    dest_num_components, value_num_components, dest_elem, precise);      }      /** @@ -368,7 +368,7 @@ public:          const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};          SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', -                    dest_num_components, value_num_components, dest_elem); +                    dest_num_components, value_num_components, dest_elem, false);          if (sets_cc) {              const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; @@ -416,7 +416,7 @@ public:              }          }(); -        SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); +        SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);      }      /** @@ -757,7 +757,8 @@ private:       * @param dest_elem Optional, the destination element to use for the operation.       */      void SetRegister(const Register& reg, u64 elem, const std::string& value, -                     u64 dest_num_components, u64 value_num_components, u64 dest_elem) { +                     u64 dest_num_components, u64 value_num_components, u64 dest_elem, +                     bool precise) {          if (reg == Register::ZeroIndex) {              LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex");              UNREACHABLE(); @@ -774,7 +775,18 @@ private:              src += GetSwizzle(elem);          } -        shader.AddLine(dest + " = " + src + ';'); +        if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { +            shader.AddLine('{'); +            ++shader.scope; +            // This avoids optimizations of constant propagation and keeps the code as the original +            // Sadly using the precise keyword causes "linking" errors on fragment shaders. +            shader.AddLine("precise float tmp = " + src + ';'); +            shader.AddLine(dest + " = tmp;"); +            --shader.scope; +            shader.AddLine('}'); +        } else { +            shader.AddLine(dest + " = " + src + ';'); +        }      }      /// Build the GLSL register list. @@ -1510,8 +1522,9 @@ private:                  ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");                  op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); +                  regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, -                                        instr.alu.saturate_d); +                                        instr.alu.saturate_d, 0, true);                  break;              }              case OpCode::Id::FADD_C: @@ -1519,8 +1532,9 @@ private:              case OpCode::Id::FADD_IMM: {                  op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);                  op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); +                  regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, -                                        instr.alu.saturate_d); +                                        instr.alu.saturate_d, 0, true);                  break;              }              case OpCode::Id::MUFU: { @@ -1528,31 +1542,31 @@ private:                  switch (instr.sub_op) {                  case SubOp::Cos:                      regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, -                                            instr.alu.saturate_d); +                                            instr.alu.saturate_d, 0, true);                      break;                  case SubOp::Sin:                      regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, -                                            instr.alu.saturate_d); +                                            instr.alu.saturate_d, 0, true);                      break;                  case SubOp::Ex2:                      regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, -                                            instr.alu.saturate_d); +                                            instr.alu.saturate_d, 0, true);                      break;                  case SubOp::Lg2:                      regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, -                                            instr.alu.saturate_d); +                                            instr.alu.saturate_d, 0, true);                      break;                  case SubOp::Rcp:                      regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, -                                            instr.alu.saturate_d); +                                            instr.alu.saturate_d, 0, true);                      break;                  case SubOp::Rsq:                      regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, -                                            instr.alu.saturate_d); +                                            instr.alu.saturate_d, 0, true);                      break;                  case SubOp::Sqrt:                      regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, -                                            instr.alu.saturate_d); +                                            instr.alu.saturate_d, 0, true);                      break;                  default:                      LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", @@ -1573,7 +1587,7 @@ private:                  regs.SetRegisterToFloat(instr.gpr0, 0,                                          '(' + condition + ") ? min(" + parameters + ") : max(" +                                              parameters + ')', -                                        1, 1); +                                        1, 1, false, 0, true);                  break;              }              case OpCode::Id::RRO_C: @@ -1602,7 +1616,7 @@ private:                  regs.SetRegisterToFloat(instr.gpr0, 0,                                          regs.GetRegisterAsFloat(instr.gpr8) + " * " +                                              GetImmediate32(instr), -                                        1, 1, instr.fmul32.saturate); +                                        1, 1, instr.fmul32.saturate, 0, true);                  break;              }              case OpCode::Id::FADD32I: { @@ -1625,7 +1639,7 @@ private:                      op_b = "-(" + op_b + ')';                  } -                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); +                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);                  break;              }              } @@ -2087,8 +2101,9 @@ private:              }              } -            regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1, -                                    instr.alu.saturate_d); +            regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', +                                    1, 1, instr.alu.saturate_d, 0, true); +              break;          }          case OpCode::Type::Hfma2: { | 
