diff options
| author | bunnei <bunneidev@gmail.com> | 2018-07-02 21:24:43 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-07-02 21:24:43 -0400 | 
| commit | 92c713506542d5e628a5495943792b11e8de5c20 (patch) | |
| tree | 1b46282f09e46b163be059965aaf1dc410ba1be4 | |
| parent | 055f1546d7d69fdcc06693847aeddda30535b283 (diff) | |
| parent | a6d4903aaff4633bcd5a39ca79ea368f86bc3ce5 (diff) | |
Merge pull request #608 from Subv/depth
GPU: Implemented the depth buffer and depth test + culling
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 56 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 27 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 50 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 6 | 
9 files changed, 246 insertions, 32 deletions
| diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 58db81222..ff67f2a58 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -280,6 +280,34 @@ public:              UnsignedInt = 0x2,          }; +        enum class ComparisonOp : u32 { +            Never = 0x200, +            Less = 0x201, +            Equal = 0x202, +            LessEqual = 0x203, +            Greater = 0x204, +            NotEqual = 0x205, +            GreaterEqual = 0x206, +            Always = 0x207, +        }; + +        struct Cull { +            enum class FrontFace : u32 { +                ClockWise = 0x0900, +                CounterClockWise = 0x0901, +            }; + +            enum class CullFace : u32 { +                Front = 0x0404, +                Back = 0x0405, +                FrontAndBack = 0x0408, +            }; + +            u32 enabled; +            FrontFace front_face; +            CullFace cull_face; +        }; +          struct Blend {              enum class Equation : u32 {                  Add = 1, @@ -413,7 +441,7 @@ public:                  struct {                      u32 address_high;                      u32 address_low; -                    u32 format; +                    Tegra::DepthFormat format;                      u32 block_dimensions;                      u32 layer_stride; @@ -435,11 +463,21 @@ public:                      };                  } rt_control; -                INSERT_PADDING_WORDS(0x31); +                INSERT_PADDING_WORDS(0x2B); + +                u32 depth_test_enable; + +                INSERT_PADDING_WORDS(0x5);                  u32 independent_blend_enable; -                INSERT_PADDING_WORDS(0x15); +                u32 depth_write_enabled; + +                INSERT_PADDING_WORDS(0x8); + +                ComparisonOp depth_test_func; + +                INSERT_PADDING_WORDS(0xB);                  struct {                      u32 separate_alpha; @@ -540,7 +578,13 @@ public:                      }                  } index_array; -                INSERT_PADDING_WORDS(0xC7); +                INSERT_PADDING_WORDS(0x7); + +                INSERT_PADDING_WORDS(0x46); + +                Cull cull; + +                INSERT_PADDING_WORDS(0x77);                  struct {                      u32 query_address_high; @@ -747,7 +791,10 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);  ASSERT_REG_POSITION(zeta, 0x3F8);  ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);  ASSERT_REG_POSITION(rt_control, 0x487); +ASSERT_REG_POSITION(depth_test_enable, 0x4B3);  ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); +ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); +ASSERT_REG_POSITION(depth_test_func, 0x4C3);  ASSERT_REG_POSITION(blend, 0x4CF);  ASSERT_REG_POSITION(vb_element_base, 0x50D);  ASSERT_REG_POSITION(tsc, 0x557); @@ -755,6 +802,7 @@ ASSERT_REG_POSITION(tic, 0x55D);  ASSERT_REG_POSITION(code_address, 0x582);  ASSERT_REG_POSITION(draw, 0x585);  ASSERT_REG_POSITION(index_array, 0x5F2); +ASSERT_REG_POSITION(cull, 0x646);  ASSERT_REG_POSITION(query, 0x6C0);  ASSERT_REG_POSITION(vertex_array[0], 0x700);  ASSERT_REG_POSITION(independent_blend, 0x780); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index d0a4ac267..cc5ca656e 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -24,6 +24,15 @@ enum class RenderTargetFormat : u32 {      R11G11B10_FLOAT = 0xE0,  }; +enum class DepthFormat : u32 { +    Z32_FLOAT = 0xA, +    Z16_UNORM = 0x13, +    S8_Z24_UNORM = 0x14, +    Z24_X8_UNORM = 0x15, +    Z24_S8_UNORM = 0x16, +    Z24_C8_UNORM = 0x18, +}; +  /// Returns the number of bytes per pixel of each rendertarget format.  u32 RenderTargetBytesPerPixel(RenderTargetFormat format); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 324c28d1b..ca3814cfc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -304,10 +304,15 @@ void RasterizerOpenGL::DrawArrays() {      MICROPROFILE_SCOPE(OpenGL_Drawing);      const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; -    // TODO(bunnei): Implement these +    // Sync the depth test state before configuring the framebuffer surfaces. +    SyncDepthTestState(); + +    // TODO(bunnei): Implement this      const bool has_stencil = false; +      const bool using_color_fb = true; -    const bool using_depth_fb = false; +    const bool using_depth_fb = regs.zeta.Address() != 0; +      const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};      const bool write_color_fb = @@ -338,11 +343,9 @@ void RasterizerOpenGL::DrawArrays() {      // Bind the framebuffer surfaces      BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); -    // Sync the viewport      SyncViewport(surfaces_rect); - -    // Sync the blend state registers      SyncBlendState(); +    SyncCullMode();      // TODO(bunnei): Sync framebuffer_scale uniform here      // TODO(bunnei): Sync scissorbox uniform(s) here @@ -712,7 +715,11 @@ void RasterizerOpenGL::SyncClipCoef() {  }  void RasterizerOpenGL::SyncCullMode() { -    UNREACHABLE(); +    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + +    state.cull.enabled = regs.cull.enabled != 0; +    state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); +    state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);  }  void RasterizerOpenGL::SyncDepthScale() { @@ -723,6 +730,14 @@ void RasterizerOpenGL::SyncDepthOffset() {      UNREACHABLE();  } +void RasterizerOpenGL::SyncDepthTestState() { +    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + +    state.depth.test_enabled = regs.depth_test_enable != 0; +    state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; +    state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); +} +  void RasterizerOpenGL::SyncBlendState() {      const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 621200f03..493aa39e5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -126,6 +126,9 @@ private:      /// Syncs the depth offset to match the guest state      void SyncDepthOffset(); +    /// Syncs the depth test state to match the guest state +    void SyncDepthTestState(); +      /// Syncs the blend state to match the guest state      void SyncBlendState(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 9410ddb4e..851ebc263 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -84,22 +84,18 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form       true},                                                                                 // DXT45      {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 + +    // DepthStencil formats +    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, +     false}, // Z24S8  }};  static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { -    const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); -    if (type == SurfaceType::ColorTexture) { -        ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); -        auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; -        ASSERT(component_type == format.component_type); -        return format; -    } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { -        // TODO(Subv): Implement depth formats -        ASSERT_MSG(false, "Unimplemented"); -    } +    ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); +    auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; +    ASSERT(component_type == format.component_type); -    UNREACHABLE(); -    return {}; +    return format;  }  VAddr SurfaceParams::GetCpuAddr() const { @@ -149,11 +145,17 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::      const auto& gpu = Core::System::GetInstance().GPU();      if (morton_to_gl) { -        auto data = Tegra::Texture::UnswizzleTexture( -            *gpu.memory_manager->GpuToCpuAddress(addr), -            SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - -        std::memcpy(gl_buffer, data.data(), data.size()); +        if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { +            auto data = Tegra::Texture::UnswizzleTexture( +                *gpu.memory_manager->GpuToCpuAddress(addr), +                SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); +            std::memcpy(gl_buffer, data.data(), data.size()); +        } else { +            auto data = Tegra::Texture::UnswizzleDepthTexture( +                *gpu.memory_manager->GpuToCpuAddress(addr), +                SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height); +            std::memcpy(gl_buffer, data.data(), data.size()); +        }      } else {          // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should          // check the configuration for this and perform more generic un/swizzle @@ -174,7 +176,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),          MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,          MortonCopy<true, PixelFormat::DXT1>,         MortonCopy<true, PixelFormat::DXT23>,          MortonCopy<true, PixelFormat::DXT45>,        MortonCopy<true, PixelFormat::DXN1>, -        MortonCopy<true, PixelFormat::ASTC_2D_4X4>, +        MortonCopy<true, PixelFormat::ASTC_2D_4X4>,  MortonCopy<true, PixelFormat::Z24S8>,  };  static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), @@ -194,6 +196,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),          nullptr,          nullptr,          MortonCopy<false, PixelFormat::ABGR8>, +        MortonCopy<false, PixelFormat::Z24S8>,  };  // Allocate an uninitialized texture of appropriate size and format for the surface @@ -397,9 +400,15 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(      // get color and depth surfaces      const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(regs.rt[0])}; -    const SurfaceParams depth_params{color_params}; +    SurfaceParams depth_params{color_params}; -    ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); +    if (using_depth_fb) { +        depth_params.addr = regs.zeta.Address(); +        depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(regs.zeta.format); +        depth_params.component_type = SurfaceParams::ComponentTypeFromDepthFormat(regs.zeta.format); +        depth_params.type = SurfaceParams::GetFormatType(depth_params.pixel_format); +        depth_params.size_in_bytes = depth_params.SizeInBytes(); +    }      MathUtil::Rectangle<u32> color_rect{};      Surface color_surface; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 99be250b4..eea432b0b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -37,7 +37,14 @@ struct SurfaceParams {          DXN1 = 11, // This is also known as BC4          ASTC_2D_4X4 = 12, -        Max, +        MaxColorFormat, + +        // DepthStencil formats +        Z24S8 = 13, + +        MaxDepthStencilFormat, + +        Max = MaxDepthStencilFormat,          Invalid = 255,      }; @@ -84,6 +91,7 @@ struct SurfaceParams {              4, // DXT45              4, // DXN1              4, // ASTC_2D_4X4 +            1, // Z24S8          }};          ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); @@ -108,6 +116,7 @@ struct SurfaceParams {              128, // DXT45              64,  // DXN1              32,  // ASTC_2D_4X4 +            32,  // Z24S8          }};          ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -117,6 +126,16 @@ struct SurfaceParams {          return GetFormatBpp(pixel_format);      } +    static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { +        switch (format) { +        case Tegra::DepthFormat::Z24_S8_UNORM: +            return PixelFormat::Z24S8; +        default: +            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); +            UNREACHABLE(); +        } +    } +      static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {          switch (format) {          case Tegra::RenderTargetFormat::RGBA8_UNORM: @@ -205,6 +224,15 @@ struct SurfaceParams {          }      } +    static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) { +        switch (format) { +        case PixelFormat::Z24S8: +            return Tegra::DepthFormat::Z24_S8_UNORM; +        default: +            UNREACHABLE(); +        } +    } +      static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {          // TODO(Subv): Implement more component types          switch (type) { @@ -244,11 +272,26 @@ struct SurfaceParams {          }      } +    static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) { +        switch (format) { +        case Tegra::DepthFormat::Z24_S8_UNORM: +            return ComponentType::UNorm; +        default: +            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); +            UNREACHABLE(); +        } +    } +      static SurfaceType GetFormatType(PixelFormat pixel_format) { -        if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { +        if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) {              return SurfaceType::ColorTexture;          } +        if (static_cast<size_t>(pixel_format) < +            static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) { +            return SurfaceType::DepthStencil; +        } +          // TODO(Subv): Implement the other formats          ASSERT(false); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 2155fb019..392041a1c 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -201,4 +201,54 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {      return {};  } +inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { +    switch (comparison) { +    case Maxwell::ComparisonOp::Never: +        return GL_NEVER; +    case Maxwell::ComparisonOp::Less: +        return GL_LESS; +    case Maxwell::ComparisonOp::Equal: +        return GL_EQUAL; +    case Maxwell::ComparisonOp::LessEqual: +        return GL_LEQUAL; +    case Maxwell::ComparisonOp::Greater: +        return GL_GREATER; +    case Maxwell::ComparisonOp::NotEqual: +        return GL_NOTEQUAL; +    case Maxwell::ComparisonOp::GreaterEqual: +        return GL_GEQUAL; +    case Maxwell::ComparisonOp::Always: +        return GL_ALWAYS; +    } +    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison)); +    UNREACHABLE(); +    return {}; +} + +inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { +    switch (front_face) { +    case Maxwell::Cull::FrontFace::ClockWise: +        return GL_CW; +    case Maxwell::Cull::FrontFace::CounterClockWise: +        return GL_CCW; +    } +    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); +    UNREACHABLE(); +    return {}; +} + +inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { +    switch (cull_face) { +    case Maxwell::Cull::CullFace::Front: +        return GL_FRONT; +    case Maxwell::Cull::CullFace::Back: +        return GL_BACK; +    case Maxwell::Cull::CullFace::FrontAndBack: +        return GL_FRONT_AND_BACK; +    } +    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); +    UNREACHABLE(); +    return {}; +} +  } // namespace MaxwellToGL diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index eaf15da32..680f22ddb 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -5,6 +5,7 @@  #include <cstring>  #include "common/assert.h"  #include "core/memory.h" +#include "video_core/gpu.h"  #include "video_core/textures/decoders.h"  #include "video_core/textures/texture.h" @@ -73,6 +74,16 @@ u32 BytesPerPixel(TextureFormat format) {      }  } +static u32 DepthBytesPerPixel(DepthFormat format) { +    switch (format) { +    case DepthFormat::Z24_S8_UNORM: +        return 4; +    default: +        UNIMPLEMENTED_MSG("Format not implemented"); +        break; +    } +} +  std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,                                   u32 block_height) {      u8* data = Memory::GetPointer(address); @@ -110,6 +121,26 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,      return unswizzled_data;  } +std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height, +                                      u32 block_height) { +    u8* data = Memory::GetPointer(address); +    u32 bytes_per_pixel = DepthBytesPerPixel(format); + +    std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); + +    switch (format) { +    case DepthFormat::Z24_S8_UNORM: +        CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, +                         unswizzled_data.data(), true, block_height); +        break; +    default: +        UNIMPLEMENTED_MSG("Format not implemented"); +        break; +    } + +    return unswizzled_data; +} +  std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,                                u32 height) {      std::vector<u8> rgba_data; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 2562c4b06..2b088c077 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -17,6 +17,12 @@ namespace Texture {  std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,                                   u32 block_height = TICEntry::DefaultBlockHeight); +/** + * Unswizzles a swizzled depth texture without changing its format. + */ +std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height, +                                      u32 block_height = TICEntry::DefaultBlockHeight); +  /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.  void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel,                        u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height); | 
