diff options
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 22 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 8 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 2 | 
8 files changed, 103 insertions, 7 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 988a6433e..cc1f90de6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -478,7 +478,9 @@ public:                  u32 depth_write_enabled; -                INSERT_PADDING_WORDS(0x8); +                INSERT_PADDING_WORDS(0x7); + +                u32 d3d_cull_mode;                  BitField<0, 3, ComparisonOp> depth_test_func; @@ -498,7 +500,13 @@ public:                      u32 enable[NumRenderTargets];                  } blend; -                INSERT_PADDING_WORDS(0x2D); +                INSERT_PADDING_WORDS(0xB); + +                union { +                    BitField<4, 1, u32> triangle_rast_flip; +                } screen_y_control; + +                INSERT_PADDING_WORDS(0x21);                  u32 vb_element_base; @@ -528,7 +536,12 @@ public:                      }                  } tic; -                INSERT_PADDING_WORDS(0x22); +                INSERT_PADDING_WORDS(0x21); + +                union { +                    BitField<2, 1, u32> coord_origin; +                    BitField<3, 10, u32> enable; +                } point_coord_replace;                  struct {                      u32 code_address_high; @@ -818,11 +831,14 @@ ASSERT_REG_POSITION(rt_control, 0x487);  ASSERT_REG_POSITION(depth_test_enable, 0x4B3);  ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);  ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); +ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);  ASSERT_REG_POSITION(depth_test_func, 0x4C3);  ASSERT_REG_POSITION(blend, 0x4CF); +ASSERT_REG_POSITION(screen_y_control, 0x4EB);  ASSERT_REG_POSITION(vb_element_base, 0x50D);  ASSERT_REG_POSITION(tsc, 0x557);  ASSERT_REG_POSITION(tic, 0x55D); +ASSERT_REG_POSITION(point_coord_replace, 0x581);  ASSERT_REG_POSITION(code_address, 0x582);  ASSERT_REG_POSITION(draw, 0x585);  ASSERT_REG_POSITION(index_array, 0x5F2); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index ec9050d3d..3b70efeec 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -329,6 +329,19 @@ union Instruction {      } isetp;      union { +        BitField<0, 3, u64> pred0; +        BitField<3, 3, u64> pred3; +        BitField<12, 3, u64> pred12; +        BitField<15, 1, u64> neg_pred12; +        BitField<24, 2, PredOperation> cond; +        BitField<29, 3, u64> pred29; +        BitField<32, 1, u64> neg_pred29; +        BitField<39, 3, u64> pred39; +        BitField<42, 1, u64> neg_pred39; +        BitField<45, 2, PredOperation> op; +    } psetp; + +    union {          BitField<39, 3, u64> pred39;          BitField<42, 1, u64> neg_pred;          BitField<43, 1, u64> neg_a; @@ -646,7 +659,7 @@ private:              INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),              INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),              INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), -            INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), +            INST("110000----111---", Id::TEX, Type::Memory, "TEX"),              INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),              INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),              INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e516eb1ad..bacb389e1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -686,7 +686,10 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,          // Bind the uniform to the sampler.          GLint uniform = glGetUniformLocation(program, entry.GetName().c_str()); -        ASSERT(uniform != -1); +        if (uniform == -1) { +            continue; +        } +          glProgramUniform1i(program, uniform, current_bindpoint);          const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); @@ -771,6 +774,16 @@ void RasterizerOpenGL::SyncCullMode() {      if (state.cull.enabled) {          state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);          state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); + +        // If the GPU is configured to flip the rasterized triangles, then we need to flip the +        // notion of front and back. Note: We flip the triangles when the value of the register is 0 +        // because OpenGL already does it for us. +        if (regs.screen_y_control.triangle_rast_flip == 0) { +            if (state.cull.front_face == GL_CCW) +                state.cull.front_face = GL_CW; +            else if (state.cull.front_face == GL_CW) +                state.cull.front_face = GL_CCW; +        }      }  } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 50469c05c..57d7763ff 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -108,7 +108,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form      {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,       false}, // Z24S8      {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, -     false}, // S8Z24 +     false},                                                                            // S8Z24 +    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F  }};  static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { @@ -191,7 +192,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),          MortonCopy<true, PixelFormat::DXT1>,         MortonCopy<true, PixelFormat::DXT23>,          MortonCopy<true, PixelFormat::DXT45>,        MortonCopy<true, PixelFormat::DXN1>,          MortonCopy<true, PixelFormat::ASTC_2D_4X4>,  MortonCopy<true, PixelFormat::Z24S8>, -        MortonCopy<true, PixelFormat::S8Z24>, +        MortonCopy<true, PixelFormat::S8Z24>,        MortonCopy<true, PixelFormat::Z32F>,  };  static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), @@ -213,6 +214,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),          MortonCopy<false, PixelFormat::ABGR8>,          MortonCopy<false, PixelFormat::Z24S8>,          MortonCopy<false, PixelFormat::S8Z24>, +        MortonCopy<false, PixelFormat::Z32F>,  };  // Allocate an uninitialized texture of appropriate size and format for the surface diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 8005a81b8..b4d7f8ebe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -42,6 +42,7 @@ struct SurfaceParams {          // DepthStencil formats          Z24S8 = 13,          S8Z24 = 14, +        Z32F = 15,          MaxDepthStencilFormat, @@ -94,6 +95,7 @@ struct SurfaceParams {              4, // ASTC_2D_4X4              1, // Z24S8              1, // S8Z24 +            1, // Z32F          }};          ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); @@ -120,6 +122,7 @@ struct SurfaceParams {              32,  // ASTC_2D_4X4              32,  // Z24S8              32,  // S8Z24 +            32,  // Z32F          }};          ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -135,6 +138,8 @@ struct SurfaceParams {              return PixelFormat::S8Z24;          case Tegra::DepthFormat::Z24_S8_UNORM:              return PixelFormat::Z24S8; +        case Tegra::DepthFormat::Z32_FLOAT: +            return PixelFormat::Z32F;          default:              LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));              UNREACHABLE(); @@ -235,6 +240,8 @@ struct SurfaceParams {              return Tegra::DepthFormat::S8_Z24_UNORM;          case PixelFormat::Z24S8:              return Tegra::DepthFormat::Z24_S8_UNORM; +        case PixelFormat::Z32F: +            return Tegra::DepthFormat::Z32_FLOAT;          default:              UNREACHABLE();          } @@ -284,6 +291,8 @@ struct SurfaceParams {          case Tegra::DepthFormat::S8_Z24_UNORM:          case Tegra::DepthFormat::Z24_S8_UNORM:              return ComponentType::UNorm; +        case Tegra::DepthFormat::Z32_FLOAT: +            return ComponentType::Float;          default:              LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));              UNREACHABLE(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d5259e0b1..e817aca5a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1213,6 +1213,9 @@ private:                  switch (instr.conversion.f2f.rounding) {                  case Tegra::Shader::F2fRoundingOp::None:                      break; +                case Tegra::Shader::F2fRoundingOp::Round: +                    op_a = "roundEven(" + op_a + ')'; +                    break;                  case Tegra::Shader::F2fRoundingOp::Floor:                      op_a = "floor(" + op_a + ')';                      break; @@ -1477,6 +1480,36 @@ private:              }              break;          } +        case OpCode::Type::PredicateSetPredicate: { +            std::string op_a = +                GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); +            std::string op_b = +                GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); + +            using Tegra::Shader::Pred; +            // We can't use the constant predicate as destination. +            ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); + +            std::string second_pred = +                GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); + +            std::string combiner = GetPredicateCombiner(instr.psetp.op); + +            std::string predicate = +                '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; + +            // Set the primary predicate to the result of Predicate OP SecondPredicate +            SetPredicate(instr.psetp.pred3, +                         '(' + predicate + ") " + combiner + " (" + second_pred + ')'); + +            if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { +                // Set the secondary predicate to the result of !Predicate OP SecondPredicate, +                // if enabled +                SetPredicate(instr.psetp.pred0, +                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); +            } +            break; +        }          case OpCode::Type::FloatSet: {              std::string op_a = instr.fset.neg_a ? "-" : "";              op_a += regs.GetRegisterAsFloat(instr.gpr8); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 6b9bb3df1..6ce53bbd9 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -29,6 +29,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {          switch (attrib.size) {          case Maxwell::VertexAttribute::Size::Size_8_8_8_8:              return GL_UNSIGNED_BYTE; +        case Maxwell::VertexAttribute::Size::Size_16_16: +            return GL_UNSIGNED_SHORT; +        case Maxwell::VertexAttribute::Size::Size_10_10_10_2: +            return GL_UNSIGNED_INT_2_10_10_10_REV;          }          LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); @@ -41,6 +45,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {          switch (attrib.size) {          case Maxwell::VertexAttribute::Size::Size_8_8_8_8:              return GL_BYTE; +        case Maxwell::VertexAttribute::Size::Size_16_16: +            return GL_SHORT; +        case Maxwell::VertexAttribute::Size::Size_10_10_10_2: +            return GL_INT_2_10_10_10_REV;          }          LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 7b06fea3e..d5ab4e4f9 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -78,6 +78,7 @@ static u32 DepthBytesPerPixel(DepthFormat format) {      switch (format) {      case DepthFormat::S8_Z24_UNORM:      case DepthFormat::Z24_S8_UNORM: +    case DepthFormat::Z32_FLOAT:          return 4;      default:          UNIMPLEMENTED_MSG("Format not implemented"); @@ -132,6 +133,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid      switch (format) {      case DepthFormat::S8_Z24_UNORM:      case DepthFormat::Z24_S8_UNORM: +    case DepthFormat::Z32_FLOAT:          CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,                           unswizzled_data.data(), true, block_height);          break;  | 
