diff options
| author | Ameer <aj662@drexel.edu> | 2020-07-08 21:15:49 -0400 | 
|---|---|---|
| committer | Ameer <aj662@drexel.edu> | 2020-07-08 21:15:49 -0400 | 
| commit | 4489ea6f532a501ca4cc379d8d8fb50ce1af27d7 (patch) | |
| tree | 52f62331d39f338f6ae6d0fd56831bdd577943a9 /src/video_core | |
| parent | b57475887be5879347d5fda425676d0bd2e2a3d3 (diff) | |
| parent | 5311b562aa3c619dca9a1a14d0a2b16281a45cc1 (diff) | |
Rebase to master, fix merge conflicts
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 115 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.h | 207 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 89 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 276 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 13 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_state_tracker.cpp | 68 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_state_tracker.h | 50 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/wrapper.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/wrapper.h | 54 | 
13 files changed, 660 insertions, 275 deletions
| diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 424278816..d1f0ea932 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -39,52 +39,18 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {  } // Anonymous namespace -void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { -    raw = 0; -    front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); -    front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); -    front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); -    front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); -    if (regs.stencil_two_side_enable) { -        back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); -        back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); -        back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); -        back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); -    } else { -        back.action_stencil_fail.Assign(front.action_stencil_fail); -        back.action_depth_fail.Assign(front.action_depth_fail); -        back.action_depth_pass.Assign(front.action_depth_pass); -        back.test_func.Assign(front.test_func); -    } -    depth_test_enable.Assign(regs.depth_test_enable); -    depth_write_enable.Assign(regs.depth_write_enabled); -    depth_bounds_enable.Assign(regs.depth_bounds_enable); -    stencil_enable.Assign(regs.stencil_enable); -    depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); -} - -void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { +void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {      const auto& clip = regs.view_volume_clip_control;      const std::array enabled_lut = {regs.polygon_offset_point_enable,                                      regs.polygon_offset_line_enable,                                      regs.polygon_offset_fill_enable};      const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); -    u32 packed_front_face = PackFrontFace(regs.front_face); -    if (regs.screen_y_control.triangle_rast_flip != 0) { -        // Flip front face -        packed_front_face = 1 - packed_front_face; -    } -      raw = 0; -    topology.Assign(topology_index);      primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); -    cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);      depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);      depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());      ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); -    cull_face.Assign(PackCullFace(regs.cull_face)); -    front_face.Assign(packed_front_face);      polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front));      patch_control_points_minus_one.Assign(regs.patch_vertices - 1);      tessellation_primitive.Assign(static_cast<u32>(regs.tess_mode.prim.Value())); @@ -93,19 +59,37 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {      logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);      logic_op.Assign(PackLogicOp(regs.logic_op.operation));      rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); +      std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast -} -void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept { +    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { +        binding_divisors[index] = +            regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; +    } + +    for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { +        const auto& input = regs.vertex_attrib_format[index]; +        auto& attribute = attributes[index]; +        attribute.raw = 0; +        attribute.enabled.Assign(input.IsConstant() ? 0 : 1); +        attribute.buffer.Assign(input.buffer); +        attribute.offset.Assign(input.offset); +        attribute.type.Assign(static_cast<u32>(input.type.Value())); +        attribute.size.Assign(static_cast<u32>(input.size.Value())); +    } +      for (std::size_t index = 0; index < std::size(attachments); ++index) {          attachments[index].Fill(regs, index);      } -} -void FixedPipelineState::ViewportSwizzles::Fill(const Maxwell& regs) noexcept {      const auto& transform = regs.viewport_transform; -    std::transform(transform.begin(), transform.end(), swizzles.begin(), +    std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),                     [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); }); + +    if (!has_extended_dynamic_state) { +        no_extended_dynamic_state.Assign(1); +        dynamic_state.Fill(regs); +    }  }  void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { @@ -147,20 +131,57 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size      enable.Assign(1);  } -void FixedPipelineState::Fill(const Maxwell& regs) { -    rasterizer.Fill(regs); -    depth_stencil.Fill(regs); -    color_blending.Fill(regs); -    viewport_swizzles.Fill(regs); +void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { +    const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); +    u32 packed_front_face = PackFrontFace(regs.front_face); +    if (regs.screen_y_control.triangle_rast_flip != 0) { +        // Flip front face +        packed_front_face = 1 - packed_front_face; +    } + +    raw1 = 0; +    raw2 = 0; +    front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); +    front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); +    front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); +    front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); +    if (regs.stencil_two_side_enable) { +        back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); +        back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); +        back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); +        back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); +    } else { +        back.action_stencil_fail.Assign(front.action_stencil_fail); +        back.action_depth_fail.Assign(front.action_depth_fail); +        back.action_depth_pass.Assign(front.action_depth_pass); +        back.test_func.Assign(front.test_func); +    } +    stencil_enable.Assign(regs.stencil_enable); +    depth_write_enable.Assign(regs.depth_write_enabled); +    depth_bounds_enable.Assign(regs.depth_bounds_enable); +    depth_test_enable.Assign(regs.depth_test_enable); +    front_face.Assign(packed_front_face); +    depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); +    topology.Assign(topology_index); +    cull_face.Assign(PackCullFace(regs.cull_face)); +    cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); + +    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { +        const auto& input = regs.vertex_array[index]; +        VertexBinding& binding = vertex_bindings[index]; +        binding.raw = 0; +        binding.enabled.Assign(input.IsEnabled() ? 1 : 0); +        binding.stride.Assign(static_cast<u16>(input.stride.Value())); +    }  }  std::size_t FixedPipelineState::Hash() const noexcept { -    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); +    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());      return static_cast<std::size_t>(hash);  }  bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { -    return std::memcmp(this, &rhs, sizeof *this) == 0; +    return std::memcmp(this, &rhs, Size()) == 0;  }  u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 31a6398f2..cdcbb65f5 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,14 +60,6 @@ struct FixedPipelineState {          void Fill(const Maxwell& regs, std::size_t index); -        std::size_t Hash() const noexcept; - -        bool operator==(const BlendingAttachment& rhs) const noexcept; - -        bool operator!=(const BlendingAttachment& rhs) const noexcept { -            return !operator==(rhs); -        } -          constexpr std::array<bool, 4> Mask() const noexcept {              return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};          } @@ -97,156 +89,116 @@ struct FixedPipelineState {          }      }; -    struct VertexInput { -        union Binding { -            u16 raw; -            BitField<0, 1, u16> enabled; -            BitField<1, 12, u16> stride; -        }; +    union VertexAttribute { +        u32 raw; +        BitField<0, 1, u32> enabled; +        BitField<1, 5, u32> buffer; +        BitField<6, 14, u32> offset; +        BitField<20, 3, u32> type; +        BitField<23, 6, u32> size; -        union Attribute { -            u32 raw; -            BitField<0, 1, u32> enabled; -            BitField<1, 5, u32> buffer; -            BitField<6, 14, u32> offset; -            BitField<20, 3, u32> type; -            BitField<23, 6, u32> size; - -            constexpr Maxwell::VertexAttribute::Type Type() const noexcept { -                return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); -            } - -            constexpr Maxwell::VertexAttribute::Size Size() const noexcept { -                return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); -            } -        }; - -        std::array<Binding, Maxwell::NumVertexArrays> bindings; -        std::array<u32, Maxwell::NumVertexArrays> binding_divisors; -        std::array<Attribute, Maxwell::NumVertexAttributes> attributes; - -        void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept { -            auto& binding = bindings[index]; -            binding.raw = 0; -            binding.enabled.Assign(enabled ? 1 : 0); -            binding.stride.Assign(static_cast<u16>(stride)); -            binding_divisors[index] = divisor; +        constexpr Maxwell::VertexAttribute::Type Type() const noexcept { +            return static_cast<Maxwell::VertexAttribute::Type>(type.Value());          } -        void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset, -                          Maxwell::VertexAttribute::Type type, -                          Maxwell::VertexAttribute::Size size) noexcept { -            auto& attribute = attributes[index]; -            attribute.raw = 0; -            attribute.enabled.Assign(enabled ? 1 : 0); -            attribute.buffer.Assign(buffer); -            attribute.offset.Assign(offset); -            attribute.type.Assign(static_cast<u32>(type)); -            attribute.size.Assign(static_cast<u32>(size)); +        constexpr Maxwell::VertexAttribute::Size Size() const noexcept { +            return static_cast<Maxwell::VertexAttribute::Size>(size.Value());          }      }; -    struct Rasterizer { -        union { -            u32 raw; -            BitField<0, 4, u32> topology; -            BitField<4, 1, u32> primitive_restart_enable; -            BitField<5, 1, u32> cull_enable; -            BitField<6, 1, u32> depth_bias_enable; -            BitField<7, 1, u32> depth_clamp_disabled; -            BitField<8, 1, u32> ndc_minus_one_to_one; -            BitField<9, 2, u32> cull_face; -            BitField<11, 1, u32> front_face; -            BitField<12, 2, u32> polygon_mode; -            BitField<14, 5, u32> patch_control_points_minus_one; -            BitField<19, 2, u32> tessellation_primitive; -            BitField<21, 2, u32> tessellation_spacing; -            BitField<23, 1, u32> tessellation_clockwise; -            BitField<24, 1, u32> logic_op_enable; -            BitField<25, 4, u32> logic_op; -            BitField<29, 1, u32> rasterize_enable; -        }; - -        // TODO(Rodrigo): Move this to push constants -        u32 point_size; +    template <std::size_t Position> +    union StencilFace { +        BitField<Position + 0, 3, u32> action_stencil_fail; +        BitField<Position + 3, 3, u32> action_depth_fail; +        BitField<Position + 6, 3, u32> action_depth_pass; +        BitField<Position + 9, 3, u32> test_func; -        void Fill(const Maxwell& regs) noexcept; +        Maxwell::StencilOp ActionStencilFail() const noexcept { +            return UnpackStencilOp(action_stencil_fail); +        } -        constexpr Maxwell::PrimitiveTopology Topology() const noexcept { -            return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); +        Maxwell::StencilOp ActionDepthFail() const noexcept { +            return UnpackStencilOp(action_depth_fail);          } -        Maxwell::CullFace CullFace() const noexcept { -            return UnpackCullFace(cull_face.Value()); +        Maxwell::StencilOp ActionDepthPass() const noexcept { +            return UnpackStencilOp(action_depth_pass);          } -        Maxwell::FrontFace FrontFace() const noexcept { -            return UnpackFrontFace(front_face.Value()); +        Maxwell::ComparisonOp TestFunc() const noexcept { +            return UnpackComparisonOp(test_func);          }      }; -    struct DepthStencil { -        template <std::size_t Position> -        union StencilFace { -            BitField<Position + 0, 3, u32> action_stencil_fail; -            BitField<Position + 3, 3, u32> action_depth_fail; -            BitField<Position + 6, 3, u32> action_depth_pass; -            BitField<Position + 9, 3, u32> test_func; - -            Maxwell::StencilOp ActionStencilFail() const noexcept { -                return UnpackStencilOp(action_stencil_fail); -            } - -            Maxwell::StencilOp ActionDepthFail() const noexcept { -                return UnpackStencilOp(action_depth_fail); -            } - -            Maxwell::StencilOp ActionDepthPass() const noexcept { -                return UnpackStencilOp(action_depth_pass); -            } - -            Maxwell::ComparisonOp TestFunc() const noexcept { -                return UnpackComparisonOp(test_func); -            } -        }; +    union VertexBinding { +        u16 raw; +        BitField<0, 12, u16> stride; +        BitField<12, 1, u16> enabled; +    }; +    struct DynamicState {          union { -            u32 raw; +            u32 raw1;              StencilFace<0> front;              StencilFace<12> back; -            BitField<24, 1, u32> depth_test_enable; +            BitField<24, 1, u32> stencil_enable;              BitField<25, 1, u32> depth_write_enable;              BitField<26, 1, u32> depth_bounds_enable; -            BitField<27, 1, u32> stencil_enable; -            BitField<28, 3, u32> depth_test_func; +            BitField<27, 1, u32> depth_test_enable; +            BitField<28, 1, u32> front_face; +            BitField<29, 3, u32> depth_test_func; +        }; +        union { +            u32 raw2; +            BitField<0, 4, u32> topology; +            BitField<4, 2, u32> cull_face; +            BitField<6, 1, u32> cull_enable;          }; +        std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings; -        void Fill(const Maxwell& regs) noexcept; +        void Fill(const Maxwell& regs);          Maxwell::ComparisonOp DepthTestFunc() const noexcept {              return UnpackComparisonOp(depth_test_func);          } -    }; - -    struct ColorBlending { -        std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; -        void Fill(const Maxwell& regs) noexcept; -    }; +        Maxwell::CullFace CullFace() const noexcept { +            return UnpackCullFace(cull_face.Value()); +        } -    struct ViewportSwizzles { -        std::array<u16, Maxwell::NumViewports> swizzles; +        Maxwell::FrontFace FrontFace() const noexcept { +            return UnpackFrontFace(front_face.Value()); +        } -        void Fill(const Maxwell& regs) noexcept; +        constexpr Maxwell::PrimitiveTopology Topology() const noexcept { +            return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); +        }      }; -    VertexInput vertex_input; -    Rasterizer rasterizer; -    DepthStencil depth_stencil; -    ColorBlending color_blending; -    ViewportSwizzles viewport_swizzles; +    union { +        u32 raw; +        BitField<0, 1, u32> no_extended_dynamic_state; +        BitField<2, 1, u32> primitive_restart_enable; +        BitField<3, 1, u32> depth_bias_enable; +        BitField<4, 1, u32> depth_clamp_disabled; +        BitField<5, 1, u32> ndc_minus_one_to_one; +        BitField<6, 2, u32> polygon_mode; +        BitField<8, 5, u32> patch_control_points_minus_one; +        BitField<13, 2, u32> tessellation_primitive; +        BitField<15, 2, u32> tessellation_spacing; +        BitField<17, 1, u32> tessellation_clockwise; +        BitField<18, 1, u32> logic_op_enable; +        BitField<19, 4, u32> logic_op; +        BitField<23, 1, u32> rasterize_enable; +    }; +    u32 point_size; +    std::array<u32, Maxwell::NumVertexArrays> binding_divisors; +    std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; +    std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; +    std::array<u16, Maxwell::NumViewports> viewport_swizzles; +    DynamicState dynamic_state; -    void Fill(const Maxwell& regs); +    void Fill(const Maxwell& regs, bool has_extended_dynamic_state);      std::size_t Hash() const noexcept; @@ -255,6 +207,11 @@ struct FixedPipelineState {      bool operator!=(const FixedPipelineState& rhs) const noexcept {          return !operator==(rhs);      } + +    std::size_t Size() const noexcept { +        const std::size_t total_size = sizeof *this; +        return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); +    }  };  static_assert(std::has_unique_object_representations_v<FixedPipelineState>);  static_assert(std::is_trivially_copyable_v<FixedPipelineState>); diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9fd8ac3f6..fdaea4210 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -313,6 +313,16 @@ bool VKDevice::Create() {          LOG_INFO(Render_Vulkan, "Device doesn't support custom border colors");      } +    VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; +    if (ext_extended_dynamic_state) { +        dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; +        dynamic_state.pNext = nullptr; +        dynamic_state.extendedDynamicState = VK_TRUE; +        SetNext(next, dynamic_state); +    } else { +        LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); +    } +      if (!ext_depth_range_unrestricted) {          LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");      } @@ -541,6 +551,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {      bool has_ext_subgroup_size_control{};      bool has_ext_transform_feedback{};      bool has_ext_custom_border_color{}; +    bool has_ext_extended_dynamic_state{};      for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) {          Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);          Test(extension, khr_uniform_buffer_standard_layout, @@ -558,6 +569,8 @@ std::vector<const char*> VKDevice::LoadExtensions() {               false);          Test(extension, has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME,               false); +        Test(extension, has_ext_extended_dynamic_state, +             VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);          if (Settings::values.renderer_debug) {              Test(extension, nv_device_diagnostics_config,                   VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -643,6 +656,19 @@ std::vector<const char*> VKDevice::LoadExtensions() {          }      } +    if (has_ext_extended_dynamic_state) { +        VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; +        dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; +        dynamic_state.pNext = nullptr; +        features.pNext = &dynamic_state; +        physical.GetFeatures2KHR(features); + +        if (dynamic_state.extendedDynamicState) { +            extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); +            ext_extended_dynamic_state = true; +        } +    } +      return extensions;  } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 6b9227b09..ae5c21baa 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -182,6 +182,11 @@ public:          return ext_custom_border_color;      } +    /// Returns true if the device supports VK_EXT_extended_dynamic_state. +    bool IsExtExtendedDynamicStateSupported() const { +        return ext_extended_dynamic_state; +    } +      /// Returns the vendor name reported from Vulkan.      std::string_view GetVendorName() const {          return vendor_name; @@ -239,6 +244,7 @@ private:      bool ext_shader_viewport_index_layer{};    ///< Support for VK_EXT_shader_viewport_index_layer.      bool ext_transform_feedback{};             ///< Support for VK_EXT_transform_feedback.      bool ext_custom_border_color{};            ///< Support for VK_EXT_custom_border_color. +    bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.      bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.      // Telemetry parameters diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 69b6bba00..844445105 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -176,20 +176,32 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(  vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,                                                  const SPIRVProgram& program) const { -    const auto& vi = fixed_state.vertex_input; -    const auto& ds = fixed_state.depth_stencil; -    const auto& cd = fixed_state.color_blending; -    const auto& rs = fixed_state.rasterizer; -    const auto& viewport_swizzles = fixed_state.viewport_swizzles.swizzles; +    const auto& state = fixed_state; +    const auto& viewport_swizzles = state.viewport_swizzles; + +    FixedPipelineState::DynamicState dynamic; +    if (device.IsExtExtendedDynamicStateSupported()) { +        // Insert dummy values, as long as they are valid they don't matter as extended dynamic +        // state is ignored +        dynamic.raw1 = 0; +        dynamic.raw2 = 0; +        for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) { +            // Enable all vertex bindings +            binding.raw = 0; +            binding.enabled.Assign(1); +        } +    } else { +        dynamic = state.dynamic_state; +    }      std::vector<VkVertexInputBindingDescription> vertex_bindings;      std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; -    for (std::size_t index = 0; index < std::size(vi.bindings); ++index) { -        const auto& binding = vi.bindings[index]; +    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { +        const auto& binding = dynamic.vertex_bindings[index];          if (!binding.enabled) {              continue;          } -        const bool instanced = vi.binding_divisors[index] != 0; +        const bool instanced = state.binding_divisors[index] != 0;          const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;          auto& vertex_binding = vertex_bindings.emplace_back(); @@ -200,14 +212,14 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa          if (instanced) {              auto& binding_divisor = vertex_binding_divisors.emplace_back();              binding_divisor.binding = static_cast<u32>(index); -            binding_divisor.divisor = vi.binding_divisors[index]; +            binding_divisor.divisor = state.binding_divisors[index];          }      }      std::vector<VkVertexInputAttributeDescription> vertex_attributes;      const auto& input_attributes = program[0]->entries.attributes; -    for (std::size_t index = 0; index < std::size(vi.attributes); ++index) { -        const auto& attribute = vi.attributes[index]; +    for (std::size_t index = 0; index < state.attributes.size(); ++index) { +        const auto& attribute = state.attributes[index];          if (!attribute.enabled) {              continue;          } @@ -244,15 +256,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa      input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;      input_assembly_ci.pNext = nullptr;      input_assembly_ci.flags = 0; -    input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology()); +    input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology());      input_assembly_ci.primitiveRestartEnable = -        rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); +        state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology);      VkPipelineTessellationStateCreateInfo tessellation_ci;      tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;      tessellation_ci.pNext = nullptr;      tessellation_ci.flags = 0; -    tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1; +    tessellation_ci.patchControlPoints = state.patch_control_points_minus_one.Value() + 1;      VkPipelineViewportStateCreateInfo viewport_ci;      viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; @@ -280,13 +292,13 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa      rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;      rasterization_ci.pNext = nullptr;      rasterization_ci.flags = 0; -    rasterization_ci.depthClampEnable = rs.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; -    rasterization_ci.rasterizerDiscardEnable = rs.rasterize_enable == 0 ? VK_TRUE : VK_FALSE; +    rasterization_ci.depthClampEnable = state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE; +    rasterization_ci.rasterizerDiscardEnable = state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE;      rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;      rasterization_ci.cullMode = -        rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE; -    rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace()); -    rasterization_ci.depthBiasEnable = rs.depth_bias_enable; +        dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE; +    rasterization_ci.frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()); +    rasterization_ci.depthBiasEnable = state.depth_bias_enable;      rasterization_ci.depthBiasConstantFactor = 0.0f;      rasterization_ci.depthBiasClamp = 0.0f;      rasterization_ci.depthBiasSlopeFactor = 0.0f; @@ -307,14 +319,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa      depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;      depth_stencil_ci.pNext = nullptr;      depth_stencil_ci.flags = 0; -    depth_stencil_ci.depthTestEnable = ds.depth_test_enable; -    depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; -    depth_stencil_ci.depthCompareOp = -        ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS; -    depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; -    depth_stencil_ci.stencilTestEnable = ds.stencil_enable; -    depth_stencil_ci.front = GetStencilFaceState(ds.front); -    depth_stencil_ci.back = GetStencilFaceState(ds.back); +    depth_stencil_ci.depthTestEnable = dynamic.depth_test_enable; +    depth_stencil_ci.depthWriteEnable = dynamic.depth_write_enable; +    depth_stencil_ci.depthCompareOp = dynamic.depth_test_enable +                                          ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) +                                          : VK_COMPARE_OP_ALWAYS; +    depth_stencil_ci.depthBoundsTestEnable = dynamic.depth_bounds_enable; +    depth_stencil_ci.stencilTestEnable = dynamic.stencil_enable; +    depth_stencil_ci.front = GetStencilFaceState(dynamic.front); +    depth_stencil_ci.back = GetStencilFaceState(dynamic.back);      depth_stencil_ci.minDepthBounds = 0.0f;      depth_stencil_ci.maxDepthBounds = 0.0f; @@ -324,7 +337,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa          static constexpr std::array COMPONENT_TABLE = {              VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,              VK_COLOR_COMPONENT_A_BIT}; -        const auto& blend = cd.attachments[index]; +        const auto& blend = state.attachments[index];          VkColorComponentFlags color_components = 0;          for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { @@ -354,11 +367,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa      color_blend_ci.pAttachments = cb_attachments.data();      std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants)); -    static constexpr std::array dynamic_states = { +    std::vector dynamic_states = {          VK_DYNAMIC_STATE_VIEWPORT,           VK_DYNAMIC_STATE_SCISSOR,          VK_DYNAMIC_STATE_DEPTH_BIAS,         VK_DYNAMIC_STATE_BLEND_CONSTANTS,          VK_DYNAMIC_STATE_DEPTH_BOUNDS,       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, -        VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE}; +        VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, +    }; +    if (device.IsExtExtendedDynamicStateSupported()) { +        static constexpr std::array extended = { +            VK_DYNAMIC_STATE_CULL_MODE_EXT, +            VK_DYNAMIC_STATE_FRONT_FACE_EXT, +            VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT, +            VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, +            VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, +            VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, +            VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, +            VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, +            VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, +            VK_DYNAMIC_STATE_STENCIL_OP_EXT, +        }; +        dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); +    }      VkPipelineDynamicStateCreateInfo dynamic_state_ci;      dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ea66e621e..3da835324 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -116,12 +116,12 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,  } // Anonymous namespace  std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { -    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); +    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());      return static_cast<std::size_t>(hash);  }  bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { -    return std::memcmp(&rhs, this, sizeof *this) == 0; +    return std::memcmp(&rhs, this, Size()) == 0;  }  std::size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -312,18 +312,19 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {      const auto& gpu = system.GPU().Maxwell3D();      Specialization specialization; -    if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) { +    if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points || +        device.IsExtExtendedDynamicStateSupported()) {          float point_size; -        std::memcpy(&point_size, &fixed_state.rasterizer.point_size, sizeof(float)); +        std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));          specialization.point_size = point_size;          ASSERT(point_size != 0.0f);      }      for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { -        const auto& attribute = fixed_state.vertex_input.attributes[i]; +        const auto& attribute = fixed_state.attributes[i];          specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;          specialization.attribute_types[i] = attribute.Type();      } -    specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; +    specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;      SPIRVProgram program;      std::vector<VkDescriptorSetLayoutBinding> bindings; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0a36e5112..0a3fe65fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -44,10 +44,10 @@ class VKUpdateDescriptorQueue;  using Maxwell = Tegra::Engines::Maxwell3D::Regs;  struct GraphicsPipelineCacheKey { -    FixedPipelineState fixed_state;      RenderPassParams renderpass_params; +    u32 padding;      std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; -    u64 padding; // This is necessary for unique object representations +    FixedPipelineState fixed_state;      std::size_t Hash() const noexcept; @@ -56,6 +56,10 @@ struct GraphicsPipelineCacheKey {      bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {          return !operator==(rhs);      } + +    std::size_t Size() const noexcept { +        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); +    }  };  static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);  static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a8d94eac3..380ed532b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -186,13 +186,22 @@ bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {             scissor.max_y < regs.zeta_height;  } +template <std::size_t N> +std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { +    std::array<VkDeviceSize, N> expanded; +    std::copy(strides.begin(), strides.end(), expanded.begin()); +    return expanded; +} +  } // Anonymous namespace  class BufferBindings final {  public: -    void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) { +    void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) {          vertex.buffers[vertex.num_buffers] = buffer;          vertex.offsets[vertex.num_buffers] = offset; +        vertex.sizes[vertex.num_buffers] = size; +        vertex.strides[vertex.num_buffers] = static_cast<u16>(stride);          ++vertex.num_buffers;      } @@ -202,76 +211,76 @@ public:          index.type = type;      } -    void Bind(VKScheduler& scheduler) const { +    void Bind(const VKDevice& device, VKScheduler& scheduler) const {          // Use this large switch case to avoid dispatching more memory in the record lambda than          // what we need. It looks horrible, but it's the best we can do on standard C++.          switch (vertex.num_buffers) {          case 0: -            return BindStatic<0>(scheduler); +            return BindStatic<0>(device, scheduler);          case 1: -            return BindStatic<1>(scheduler); +            return BindStatic<1>(device, scheduler);          case 2: -            return BindStatic<2>(scheduler); +            return BindStatic<2>(device, scheduler);          case 3: -            return BindStatic<3>(scheduler); +            return BindStatic<3>(device, scheduler);          case 4: -            return BindStatic<4>(scheduler); +            return BindStatic<4>(device, scheduler);          case 5: -            return BindStatic<5>(scheduler); +            return BindStatic<5>(device, scheduler);          case 6: -            return BindStatic<6>(scheduler); +            return BindStatic<6>(device, scheduler);          case 7: -            return BindStatic<7>(scheduler); +            return BindStatic<7>(device, scheduler);          case 8: -            return BindStatic<8>(scheduler); +            return BindStatic<8>(device, scheduler);          case 9: -            return BindStatic<9>(scheduler); +            return BindStatic<9>(device, scheduler);          case 10: -            return BindStatic<10>(scheduler); +            return BindStatic<10>(device, scheduler);          case 11: -            return BindStatic<11>(scheduler); +            return BindStatic<11>(device, scheduler);          case 12: -            return BindStatic<12>(scheduler); +            return BindStatic<12>(device, scheduler);          case 13: -            return BindStatic<13>(scheduler); +            return BindStatic<13>(device, scheduler);          case 14: -            return BindStatic<14>(scheduler); +            return BindStatic<14>(device, scheduler);          case 15: -            return BindStatic<15>(scheduler); +            return BindStatic<15>(device, scheduler);          case 16: -            return BindStatic<16>(scheduler); +            return BindStatic<16>(device, scheduler);          case 17: -            return BindStatic<17>(scheduler); +            return BindStatic<17>(device, scheduler);          case 18: -            return BindStatic<18>(scheduler); +            return BindStatic<18>(device, scheduler);          case 19: -            return BindStatic<19>(scheduler); +            return BindStatic<19>(device, scheduler);          case 20: -            return BindStatic<20>(scheduler); +            return BindStatic<20>(device, scheduler);          case 21: -            return BindStatic<21>(scheduler); +            return BindStatic<21>(device, scheduler);          case 22: -            return BindStatic<22>(scheduler); +            return BindStatic<22>(device, scheduler);          case 23: -            return BindStatic<23>(scheduler); +            return BindStatic<23>(device, scheduler);          case 24: -            return BindStatic<24>(scheduler); +            return BindStatic<24>(device, scheduler);          case 25: -            return BindStatic<25>(scheduler); +            return BindStatic<25>(device, scheduler);          case 26: -            return BindStatic<26>(scheduler); +            return BindStatic<26>(device, scheduler);          case 27: -            return BindStatic<27>(scheduler); +            return BindStatic<27>(device, scheduler);          case 28: -            return BindStatic<28>(scheduler); +            return BindStatic<28>(device, scheduler);          case 29: -            return BindStatic<29>(scheduler); +            return BindStatic<29>(device, scheduler);          case 30: -            return BindStatic<30>(scheduler); +            return BindStatic<30>(device, scheduler);          case 31: -            return BindStatic<31>(scheduler); +            return BindStatic<31>(device, scheduler);          case 32: -            return BindStatic<32>(scheduler); +            return BindStatic<32>(device, scheduler);          }          UNREACHABLE();      } @@ -282,6 +291,8 @@ private:          std::size_t num_buffers = 0;          std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;          std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; +        std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; +        std::array<u16, Maxwell::NumVertexArrays> strides;      } vertex;      struct { @@ -291,15 +302,23 @@ private:      } index;      template <std::size_t N> -    void BindStatic(VKScheduler& scheduler) const { -        if (index.buffer) { -            BindStatic<N, true>(scheduler); +    void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { +        if (device.IsExtExtendedDynamicStateSupported()) { +            if (index.buffer) { +                BindStatic<N, true, true>(scheduler); +            } else { +                BindStatic<N, false, true>(scheduler); +            }          } else { -            BindStatic<N, false>(scheduler); +            if (index.buffer) { +                BindStatic<N, true, false>(scheduler); +            } else { +                BindStatic<N, false, false>(scheduler); +            }          }      } -    template <std::size_t N, bool is_indexed> +    template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state>      void BindStatic(VKScheduler& scheduler) const {          static_assert(N <= Maxwell::NumVertexArrays);          if constexpr (N == 0) { @@ -311,6 +330,31 @@ private:          std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());          std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); +        if constexpr (has_extended_dynamic_state) { +            // With extended dynamic states we can specify the length and stride of a vertex buffer +            // std::array<VkDeviceSize, N> sizes; +            std::array<u16, N> strides; +            // std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin()); +            std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin()); + +            if constexpr (is_indexed) { +                scheduler.Record( +                    [buffers, offsets, strides, index = index](vk::CommandBuffer cmdbuf) { +                        cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); +                        cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), +                                                     offsets.data(), nullptr, +                                                     ExpandStrides(strides).data()); +                    }); +            } else { +                scheduler.Record([buffers, offsets, strides](vk::CommandBuffer cmdbuf) { +                    cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), +                                                 offsets.data(), nullptr, +                                                 ExpandStrides(strides).data()); +                }); +            } +            return; +        } +          if constexpr (is_indexed) {              // Indexed draw              scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { @@ -369,7 +413,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {      const auto& gpu = system.GPU().Maxwell3D();      GraphicsPipelineCacheKey key; -    key.fixed_state.Fill(gpu.regs); +    key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());      buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); @@ -402,7 +446,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {      UpdateDynamicStates(); -    buffer_bindings.Bind(scheduler); +    buffer_bindings.Bind(device, scheduler);      BeginTransformFeedback(); @@ -822,7 +866,7 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt      const auto& gpu = system.GPU().Maxwell3D();      const auto& regs = gpu.regs; -    SetupVertexArrays(fixed_state.vertex_input, buffer_bindings); +    SetupVertexArrays(buffer_bindings);      const u32 base_instance = regs.vb_base_instance;      const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; @@ -893,6 +937,17 @@ void RasterizerVulkan::UpdateDynamicStates() {      UpdateBlendConstants(regs);      UpdateDepthBounds(regs);      UpdateStencilFaces(regs); +    if (device.IsExtExtendedDynamicStateSupported()) { +        UpdateCullMode(regs); +        UpdateDepthBoundsTestEnable(regs); +        UpdateDepthTestEnable(regs); +        UpdateDepthWriteEnable(regs); +        UpdateDepthCompareOp(regs); +        UpdateFrontFace(regs); +        UpdatePrimitiveTopology(regs); +        UpdateStencilOp(regs); +        UpdateStencilTestEnable(regs); +    }  }  void RasterizerVulkan::BeginTransformFeedback() { @@ -940,41 +995,25 @@ void RasterizerVulkan::EndTransformFeedback() {          [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });  } -void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, -                                         BufferBindings& buffer_bindings) { +void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {      const auto& regs = system.GPU().Maxwell3D().regs; -    for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { -        const auto& attrib = regs.vertex_attrib_format[index]; -        if (attrib.IsConstant()) { -            vertex_input.SetAttribute(index, false, 0, 0, {}, {}); -            continue; -        } -        vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), -                                  attrib.size.Value()); -    } -      for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {          const auto& vertex_array = regs.vertex_array[index];          if (!vertex_array.IsEnabled()) { -            vertex_input.SetBinding(index, false, 0, 0);              continue;          } -        vertex_input.SetBinding( -            index, true, vertex_array.stride, -            regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); -          const GPUVAddr start{vertex_array.StartAddress()};          const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};          ASSERT(end >= start); -        const std::size_t size{end - start}; +        const std::size_t size = end - start;          if (size == 0) { -            buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); +            buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);              continue;          }          const auto info = buffer_cache.UploadMemory(start, size); -        buffer_bindings.AddVertexBinding(info.handle, info.offset); +        buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride);      }  } @@ -1326,6 +1365,117 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)      }  } +void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchCullMode()) { +        return; +    } +    scheduler.Record( +        [enabled = regs.cull_test_enabled, cull_face = regs.cull_face](vk::CommandBuffer cmdbuf) { +            cmdbuf.SetCullModeEXT(enabled ? MaxwellToVK::CullFace(cull_face) : VK_CULL_MODE_NONE); +        }); +} + +void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchDepthBoundsTestEnable()) { +        return; +    } +    scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { +        cmdbuf.SetDepthBoundsTestEnableEXT(enable); +    }); +} + +void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchDepthTestEnable()) { +        return; +    } +    scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) { +        cmdbuf.SetDepthTestEnableEXT(enable); +    }); +} + +void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchDepthWriteEnable()) { +        return; +    } +    scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) { +        cmdbuf.SetDepthWriteEnableEXT(enable); +    }); +} + +void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchDepthCompareOp()) { +        return; +    } +    scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf) { +        cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(func)); +    }); +} + +void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchFrontFace()) { +        return; +    } + +    VkFrontFace front_face = MaxwellToVK::FrontFace(regs.front_face); +    if (regs.screen_y_control.triangle_rast_flip != 0) { +        front_face = front_face == VK_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_COUNTER_CLOCKWISE +                                                           : VK_FRONT_FACE_CLOCKWISE; +    } +    scheduler.Record( +        [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); }); +} + +void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchPrimitiveTopology()) { +        return; +    } +    const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); +    scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) { +        cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology)); +    }); +} + +void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchStencilOp()) { +        return; +    } +    const Maxwell::StencilOp fail = regs.stencil_front_op_fail; +    const Maxwell::StencilOp zfail = regs.stencil_front_op_zfail; +    const Maxwell::StencilOp zpass = regs.stencil_front_op_zpass; +    const Maxwell::ComparisonOp compare = regs.stencil_front_func_func; +    if (regs.stencil_two_side_enable) { +        scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) { +            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail), +                                   MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), +                                   MaxwellToVK::ComparisonOp(compare)); +        }); +    } else { +        const Maxwell::StencilOp back_fail = regs.stencil_back_op_fail; +        const Maxwell::StencilOp back_zfail = regs.stencil_back_op_zfail; +        const Maxwell::StencilOp back_zpass = regs.stencil_back_op_zpass; +        const Maxwell::ComparisonOp back_compare = regs.stencil_back_func_func; +        scheduler.Record([fail, zfail, zpass, compare, back_fail, back_zfail, back_zpass, +                          back_compare](vk::CommandBuffer cmdbuf) { +            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_BIT, MaxwellToVK::StencilOp(fail), +                                   MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), +                                   MaxwellToVK::ComparisonOp(compare)); +            cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_BACK_BIT, MaxwellToVK::StencilOp(back_fail), +                                   MaxwellToVK::StencilOp(back_zpass), +                                   MaxwellToVK::StencilOp(back_zfail), +                                   MaxwellToVK::ComparisonOp(back_compare)); +        }); +    } +} + +void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { +    if (!state_tracker.TouchStencilTestEnable()) { +        return; +    } +    scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) { +        cmdbuf.SetStencilTestEnableEXT(enable); +    }); +} +  std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {      std::size_t size = CalculateVertexArraysSize();      if (is_indexed) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 83e00e7e9..923178b0b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -185,8 +185,7 @@ private:      bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); -    void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, -                           BufferBindings& buffer_bindings); +    void SetupVertexArrays(BufferBindings& buffer_bindings);      void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); @@ -246,6 +245,16 @@ private:      void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);      void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); +    void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); +      std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;      std::size_t CalculateComputeStreamBufferSize() const; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 94a89e388..e5a583dd5 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -36,6 +36,15 @@ Flags MakeInvalidationFlags() {      flags[BlendConstants] = true;      flags[DepthBounds] = true;      flags[StencilProperties] = true; +    flags[CullMode] = true; +    flags[DepthBoundsEnable] = true; +    flags[DepthTestEnable] = true; +    flags[DepthWriteEnable] = true; +    flags[DepthCompareOp] = true; +    flags[FrontFace] = true; +    flags[PrimitiveTopology] = true; +    flags[StencilOp] = true; +    flags[StencilTestEnable] = true;      return flags;  } @@ -75,6 +84,57 @@ void SetupDirtyStencilProperties(Tables& tables) {      table[OFF(stencil_back_func_mask)] = StencilProperties;  } +void SetupDirtyCullMode(Tables& tables) { +    auto& table = tables[0]; +    table[OFF(cull_face)] = CullMode; +    table[OFF(cull_test_enabled)] = CullMode; +} + +void SetupDirtyDepthBoundsEnable(Tables& tables) { +    tables[0][OFF(depth_bounds_enable)] = DepthBoundsEnable; +} + +void SetupDirtyDepthTestEnable(Tables& tables) { +    tables[0][OFF(depth_test_enable)] = DepthTestEnable; +} + +void SetupDirtyDepthWriteEnable(Tables& tables) { +    tables[0][OFF(depth_write_enabled)] = DepthWriteEnable; +} + +void SetupDirtyDepthCompareOp(Tables& tables) { +    tables[0][OFF(depth_test_func)] = DepthCompareOp; +} + +void SetupDirtyFrontFace(Tables& tables) { +    auto& table = tables[0]; +    table[OFF(front_face)] = FrontFace; +    table[OFF(screen_y_control)] = FrontFace; +} + +void SetupDirtyPrimitiveTopology(Tables& tables) { +    tables[0][OFF(draw.topology)] = PrimitiveTopology; +} + +void SetupDirtyStencilOp(Tables& tables) { +    auto& table = tables[0]; +    table[OFF(stencil_front_op_fail)] = StencilOp; +    table[OFF(stencil_front_op_zfail)] = StencilOp; +    table[OFF(stencil_front_op_zpass)] = StencilOp; +    table[OFF(stencil_front_func_func)] = StencilOp; +    table[OFF(stencil_back_op_fail)] = StencilOp; +    table[OFF(stencil_back_op_zfail)] = StencilOp; +    table[OFF(stencil_back_op_zpass)] = StencilOp; +    table[OFF(stencil_back_func_func)] = StencilOp; + +    // Table 0 is used by StencilProperties +    tables[1][OFF(stencil_two_side_enable)] = StencilOp; +} + +void SetupDirtyStencilTestEnable(Tables& tables) { +    tables[0][OFF(stencil_enable)] = StencilTestEnable; +} +  } // Anonymous namespace  StateTracker::StateTracker(Core::System& system) @@ -90,6 +150,14 @@ void StateTracker::Initialize() {      SetupDirtyBlendConstants(tables);      SetupDirtyDepthBounds(tables);      SetupDirtyStencilProperties(tables); +    SetupDirtyCullMode(tables); +    SetupDirtyDepthBoundsEnable(tables); +    SetupDirtyDepthTestEnable(tables); +    SetupDirtyDepthWriteEnable(tables); +    SetupDirtyDepthCompareOp(tables); +    SetupDirtyFrontFace(tables); +    SetupDirtyPrimitiveTopology(tables); +    SetupDirtyStencilOp(tables);  }  void StateTracker::InvalidateCommandBufferState() { diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 03bc415b2..54ca0d6c6 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -26,6 +26,16 @@ enum : u8 {      DepthBounds,      StencilProperties, +    CullMode, +    DepthBoundsEnable, +    DepthTestEnable, +    DepthWriteEnable, +    DepthCompareOp, +    FrontFace, +    PrimitiveTopology, +    StencilOp, +    StencilTestEnable, +      Last  };  static_assert(Last <= std::numeric_limits<u8>::max()); @@ -64,6 +74,46 @@ public:          return Exchange(Dirty::StencilProperties, false);      } +    bool TouchCullMode() { +        return Exchange(Dirty::CullMode, false); +    } + +    bool TouchDepthBoundsTestEnable() { +        return Exchange(Dirty::DepthBoundsEnable, false); +    } + +    bool TouchDepthTestEnable() { +        return Exchange(Dirty::DepthTestEnable, false); +    } + +    bool TouchDepthBoundsEnable() { +        return Exchange(Dirty::DepthBoundsEnable, false); +    } + +    bool TouchDepthWriteEnable() { +        return Exchange(Dirty::DepthWriteEnable, false); +    } + +    bool TouchDepthCompareOp() { +        return Exchange(Dirty::DepthCompareOp, false); +    } + +    bool TouchFrontFace() { +        return Exchange(Dirty::FrontFace, false); +    } + +    bool TouchPrimitiveTopology() { +        return Exchange(Dirty::PrimitiveTopology, false); +    } + +    bool TouchStencilOp() { +        return Exchange(Dirty::StencilOp, false); +    } + +    bool TouchStencilTestEnable() { +        return Exchange(Dirty::StencilTestEnable, false); +    } +  private:      bool Exchange(std::size_t id, bool new_value) const noexcept {          auto& flags = system.GPU().Maxwell3D().dirty.flags; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 0d485a662..051298cc8 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -88,6 +88,16 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {      X(vkCmdSetStencilWriteMask);      X(vkCmdSetViewport);      X(vkCmdWaitEvents); +    X(vkCmdBindVertexBuffers2EXT); +    X(vkCmdSetCullModeEXT); +    X(vkCmdSetDepthBoundsTestEnableEXT); +    X(vkCmdSetDepthCompareOpEXT); +    X(vkCmdSetDepthTestEnableEXT); +    X(vkCmdSetDepthWriteEnableEXT); +    X(vkCmdSetFrontFaceEXT); +    X(vkCmdSetPrimitiveTopologyEXT); +    X(vkCmdSetStencilOpEXT); +    X(vkCmdSetStencilTestEnableEXT);      X(vkCreateBuffer);      X(vkCreateBufferView);      X(vkCreateCommandPool); diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index d56fdb3f9..71daac9d7 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -207,6 +207,16 @@ struct DeviceDispatch : public InstanceDispatch {      PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask;      PFN_vkCmdSetViewport vkCmdSetViewport;      PFN_vkCmdWaitEvents vkCmdWaitEvents; +    PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT; +    PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT; +    PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT; +    PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT; +    PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT; +    PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT; +    PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT; +    PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; +    PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; +    PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT;      PFN_vkCreateBuffer vkCreateBuffer;      PFN_vkCreateBufferView vkCreateBufferView;      PFN_vkCreateCommandPool vkCreateCommandPool; @@ -969,6 +979,50 @@ public:                               buffer_barriers.data(), image_barriers.size(), image_barriers.data());      } +    void BindVertexBuffers2EXT(u32 first_binding, u32 binding_count, const VkBuffer* buffers, +                               const VkDeviceSize* offsets, const VkDeviceSize* sizes, +                               const VkDeviceSize* strides) const noexcept { +        dld->vkCmdBindVertexBuffers2EXT(handle, first_binding, binding_count, buffers, offsets, +                                        sizes, strides); +    } + +    void SetCullModeEXT(VkCullModeFlags cull_mode) const noexcept { +        dld->vkCmdSetCullModeEXT(handle, cull_mode); +    } + +    void SetDepthBoundsTestEnableEXT(bool enable) const noexcept { +        dld->vkCmdSetDepthBoundsTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); +    } + +    void SetDepthCompareOpEXT(VkCompareOp compare_op) const noexcept { +        dld->vkCmdSetDepthCompareOpEXT(handle, compare_op); +    } + +    void SetDepthTestEnableEXT(bool enable) const noexcept { +        dld->vkCmdSetDepthTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); +    } + +    void SetDepthWriteEnableEXT(bool enable) const noexcept { +        dld->vkCmdSetDepthWriteEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); +    } + +    void SetFrontFaceEXT(VkFrontFace front_face) const noexcept { +        dld->vkCmdSetFrontFaceEXT(handle, front_face); +    } + +    void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { +        dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); +    } + +    void SetStencilOpEXT(VkStencilFaceFlags face_mask, VkStencilOp fail_op, VkStencilOp pass_op, +                         VkStencilOp depth_fail_op, VkCompareOp compare_op) const noexcept { +        dld->vkCmdSetStencilOpEXT(handle, face_mask, fail_op, pass_op, depth_fail_op, compare_op); +    } + +    void SetStencilTestEnableEXT(bool enable) const noexcept { +        dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); +    } +      void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,                                           const VkDeviceSize* offsets,                                           const VkDeviceSize* sizes) const noexcept { | 
