diff options
| -rwxr-xr-x | hooks/pre-commit | 6 | ||||
| -rw-r--r-- | src/core/file_sys/disk_archive.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/pica.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/pica.h | 35 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 58 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 23 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 6 | 
11 files changed, 134 insertions, 37 deletions
diff --git a/hooks/pre-commit b/hooks/pre-commit index bad84b14b..c100bb634 100755 --- a/hooks/pre-commit +++ b/hooks/pre-commit @@ -3,8 +3,10 @@  # Enforce citra's whitespace policy  git config --local core.whitespace tab-in-indent,trailing-space +paths_to_check="src/ CMakeLists.txt" +  # If there are whitespace errors, print the offending file names and fail. -if ! git diff --cached --check; then +if ! git diff --cached --check -- $paths_to_check ; then      cat<<END;  Error: This commit would contain trailing spaces or tabs, which is against this repo's policy. @@ -15,7 +17,7 @@ END  fi  # Check for tabs, since tab-in-indent catches only those at the beginning of a line -if git diff --cached | egrep '^\+.*	'; then +if git diff --cached -- $paths_to_check | egrep '^\+.*	'; then      cat<<END;  Error: This commit would contain a tab, which is against this repo's policy.  If you know what you are doing, you can try 'git commit --no-verify' to bypass the check. diff --git a/src/core/file_sys/disk_archive.cpp b/src/core/file_sys/disk_archive.cpp index 1096fd34d..e9ecd2b1c 100644 --- a/src/core/file_sys/disk_archive.cpp +++ b/src/core/file_sys/disk_archive.cpp @@ -102,7 +102,7 @@ bool DiskFile::Open() {      mode_string += "b";      file = Common::make_unique<FileUtil::IOFile>(path, mode_string.c_str()); -    return true; +    return file->IsOpen();  }  size_t DiskFile::Read(const u64 offset, const size_t length, u8* buffer) const { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index a78985510..682be89ec 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -235,7 +235,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {              for (unsigned int index = 0; index < regs.num_vertices; ++index)              { -                unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; +                // Indexed rendering doesn't use the start offset +                unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : (index + regs.vertex_offset);                  // -1 is a common special value used for primitive restart. Since it's unknown if                  // the PICA supports it, and it would mess up the caching, guard against it here. diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index c73a8178e..61983bc6c 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -49,11 +49,13 @@ std::string Regs::GetCommandName(int index) {          ADD_FIELD(vertex_attributes);          ADD_FIELD(index_array);          ADD_FIELD(num_vertices); +        ADD_FIELD(vertex_offset);          ADD_FIELD(trigger_draw);          ADD_FIELD(trigger_draw_indexed);          ADD_FIELD(vs_default_attributes_setup);          ADD_FIELD(command_buffer);          ADD_FIELD(triangle_topology); +        ADD_FIELD(restart_primitive);          ADD_FIELD(gs.bool_uniforms);          ADD_FIELD(gs.int_uniforms);          ADD_FIELD(gs.main_offset); diff --git a/src/video_core/pica.h b/src/video_core/pica.h index bb689f2a9..855cb442e 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -441,8 +441,14 @@ struct Regs {      };      enum class StencilAction : u32 { -        Keep = 0, -        Xor  = 5, +        Keep           = 0, +        Zero           = 1, +        Replace        = 2, +        Increment      = 3, +        Decrement      = 4, +        Invert         = 5, +        IncrementWrap  = 6, +        DecrementWrap  = 7      };      struct { @@ -481,23 +487,29 @@ struct Regs {          struct {              union { +                // Raw value of this register +                u32 raw_func; +                  // If true, enable stencil testing                  BitField< 0, 1, u32> enable;                  // Comparison operation for stencil testing                  BitField< 4, 3, CompareFunc> func; -                // Value to calculate the new stencil value from -                BitField< 8, 8, u32> replacement_value; +                // Mask used to control writing to the stencil buffer +                BitField< 8, 8, u32> write_mask;                  // Value to compare against for stencil testing                  BitField<16, 8, u32> reference_value;                  // Mask to apply on stencil test inputs -                BitField<24, 8, u32> mask; +                BitField<24, 8, u32> input_mask;              };              union { +                // Raw value of this register +                u32 raw_op; +                  // Action to perform when the stencil test fails                  BitField< 0, 3, StencilAction> action_stencil_fail; @@ -757,7 +769,12 @@ struct Regs {      // Number of vertices to render      u32 num_vertices; -    INSERT_PADDING_WORDS(0x5); +    INSERT_PADDING_WORDS(0x1); + +    // The index of the first vertex to render +    u32 vertex_offset; + +    INSERT_PADDING_WORDS(0x3);      // These two trigger rendering of triangles      u32 trigger_draw; @@ -811,7 +828,9 @@ struct Regs {      BitField<8, 2, TriangleTopology> triangle_topology; -    INSERT_PADDING_WORDS(0x21); +    u32 restart_primitive; + +    INSERT_PADDING_WORDS(0x20);      struct ShaderConfig {          BitField<0, 16, u32> bool_uniforms; @@ -980,11 +999,13 @@ ASSERT_REG_POSITION(framebuffer, 0x110);  ASSERT_REG_POSITION(vertex_attributes, 0x200);  ASSERT_REG_POSITION(index_array, 0x227);  ASSERT_REG_POSITION(num_vertices, 0x228); +ASSERT_REG_POSITION(vertex_offset, 0x22a);  ASSERT_REG_POSITION(trigger_draw, 0x22e);  ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);  ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);  ASSERT_REG_POSITION(command_buffer, 0x238);  ASSERT_REG_POSITION(triangle_topology, 0x25e); +ASSERT_REG_POSITION(restart_primitive, 0x25f);  ASSERT_REG_POSITION(gs, 0x280);  ASSERT_REG_POSITION(vs, 0x2b0); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 4a159da8e..77eadda9e 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -216,14 +216,33 @@ static void SetStencil(int x, int y, u8 value) {      }  } -// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not! -static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { +static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 ref) {      switch (action) {      case Regs::StencilAction::Keep: -        return dest; +        return old_stencil; -    case Regs::StencilAction::Xor: -        return dest ^ ref; +    case Regs::StencilAction::Zero: +        return 0; + +    case Regs::StencilAction::Replace: +        return ref; + +    case Regs::StencilAction::Increment: +        // Saturated increment +        return std::min<u8>(old_stencil, 254) + 1; + +    case Regs::StencilAction::Decrement: +        // Saturated decrement +        return std::max<u8>(old_stencil, 1) - 1; + +    case Regs::StencilAction::Invert: +        return ~old_stencil; + +    case Regs::StencilAction::IncrementWrap: +        return old_stencil + 1; + +    case Regs::StencilAction::DecrementWrap: +        return old_stencil - 1;      default:          LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); @@ -783,10 +802,16 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,              }              u8 old_stencil = 0; + +            auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { +                u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); +                SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); +            }; +              if (stencil_action_enable) {                  old_stencil = GetStencil(x >> 4, y >> 4); -                u8 dest = old_stencil & stencil_test.mask; -                u8 ref = stencil_test.reference_value & stencil_test.mask; +                u8 dest = old_stencil & stencil_test.input_mask; +                u8 ref = stencil_test.reference_value & stencil_test.input_mask;                  bool pass = false;                  switch (stencil_test.func) { @@ -824,8 +849,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,                  }                  if (!pass) { -                    u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); -                    SetStencil(x >> 4, y >> 4, new_stencil); +                    UpdateStencil(stencil_test.action_stencil_fail);                      continue;                  }              } @@ -875,23 +899,19 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,                  }                  if (!pass) { -                    if (stencil_action_enable) { -                        u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); -                        SetStencil(x >> 4, y >> 4, new_stencil); -                    } +                    if (stencil_action_enable) +                        UpdateStencil(stencil_test.action_depth_fail);                      continue;                  }                  if (output_merger.depth_write_enable)                      SetDepth(x >> 4, y >> 4, z); - -                if (stencil_action_enable) { -                    // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? -                    u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); -                    SetStencil(x >> 4, y >> 4, new_stencil); -                }              } +            // The stencil depth_pass action is executed even if depth testing is disabled +            if (stencil_action_enable) +                UpdateStencil(stencil_test.action_depth_pass); +              auto dest = GetPixel(x >> 4, y >> 4);              Math::Vec4<u8> blend_output = combiner_output; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f0ccc2397..d29049508 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -269,7 +269,8 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {          break;      // Stencil test -    case PICA_REG_INDEX(output_merger.stencil_test): +    case PICA_REG_INDEX(output_merger.stencil_test.raw_func): +    case PICA_REG_INDEX(output_merger.stencil_test.raw_op):          SyncStencilTest();          break; @@ -676,7 +677,15 @@ void RasterizerOpenGL::SyncLogicOp() {  }  void RasterizerOpenGL::SyncStencilTest() { -    // TODO: Implement stencil test, mask, and op +    const auto& regs = Pica::g_state.regs; +    state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; +    state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); +    state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; +    state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; +    state.stencil.write_mask = regs.output_merger.stencil_test.write_mask; +    state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); +    state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); +    state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass);  }  void RasterizerOpenGL::SyncDepthTest() { @@ -867,8 +876,15 @@ void RasterizerOpenGL::ReloadDepthBuffer() {      state.Apply();      glActiveTexture(GL_TEXTURE0); -    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, -                    fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); +    if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { +        // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer. +        // The bug has been reported to Intel (https://communities.intel.com/message/324464) +        glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0, +            GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get()); +    } else { +        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, +            fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); +    }      state.texture_units[0].texture_2d = 0;      state.Apply(); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 871324014..ba47ce8b8 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -26,6 +26,9 @@ OpenGLState::OpenGLState() {      stencil.test_ref = 0;      stencil.test_mask = -1;      stencil.write_mask = -1; +    stencil.action_depth_fail = GL_KEEP; +    stencil.action_depth_pass = GL_KEEP; +    stencil.action_stencil_fail = GL_KEEP;      blend.enabled = false;      blend.src_rgb_func = GL_ONE; @@ -105,6 +108,12 @@ void OpenGLState::Apply() {          glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask);      } +    if (stencil.action_depth_fail != cur_state.stencil.action_depth_fail || +            stencil.action_depth_pass != cur_state.stencil.action_depth_pass || +            stencil.action_stencil_fail != cur_state.stencil.action_stencil_fail) { +        glStencilOp(stencil.action_stencil_fail, stencil.action_depth_fail, stencil.action_depth_pass); +    } +      // Stencil mask      if (stencil.write_mask != cur_state.stencil.write_mask) {          glStencilMask(stencil.write_mask); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 3e2379021..81e7e0877 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -32,6 +32,9 @@ public:          GLint test_ref; // GL_STENCIL_REF          GLuint test_mask; // GL_STENCIL_VALUE_MASK          GLuint write_mask; // GL_STENCIL_WRITEMASK +        GLenum action_stencil_fail; // GL_STENCIL_FAIL +        GLenum action_depth_fail; // GL_STENCIL_PASS_DEPTH_FAIL +        GLenum action_depth_pass; // GL_STENCIL_PASS_DEPTH_PASS      } stencil;      struct { diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 3b562da86..12806fad5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -152,6 +152,29 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {      return compare_func_table[(unsigned)func];  } +inline GLenum StencilOp(Pica::Regs::StencilAction action) { +    static const GLenum stencil_op_table[] = { +        GL_KEEP,        // StencilAction::Keep +        GL_ZERO,        // StencilAction::Zero +        GL_REPLACE,     // StencilAction::Replace +        GL_INCR,        // StencilAction::Increment +        GL_DECR,        // StencilAction::Decrement +        GL_INVERT,      // StencilAction::Invert +        GL_INCR_WRAP,   // StencilAction::IncrementWrap +        GL_DECR_WRAP    // StencilAction::DecrementWrap +    }; + +    // Range check table for input +    if ((unsigned)action >= ARRAY_SIZE(stencil_op_table)) { +        LOG_CRITICAL(Render_OpenGL, "Unknown stencil op %d", action); +        UNREACHABLE(); + +        return GL_KEEP; +    } + +    return stencil_op_table[(unsigned)action]; +} +  inline std::array<GLfloat, 4> ColorRGBA8(const u8* bytes) {      return { { bytes[0] / 255.0f,                 bytes[1] / 255.0f, diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index d3cfe109e..c7b63a9b7 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -434,10 +434,10 @@ void JitCompiler::Compile_SGE(Instruction instr) {          Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);      } -    CMPPS(SRC1, R(SRC2), CMP_NLT); -    ANDPS(SRC1, R(ONE)); +    CMPPS(SRC2, R(SRC1), CMP_LE); +    ANDPS(SRC2, R(ONE)); -    Compile_DestEnable(instr, SRC1); +    Compile_DestEnable(instr, SRC2);  }  void JitCompiler::Compile_SLT(Instruction instr) {  | 
