diff options
25 files changed, 502 insertions, 125 deletions
| diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f617665de..b474eb363 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,8 @@ add_library(video_core STATIC      renderer_null/null_rasterizer.h      renderer_null/renderer_null.cpp      renderer_null/renderer_null.h +    renderer_opengl/blit_image.cpp +    renderer_opengl/blit_image.h      renderer_opengl/gl_buffer_cache.cpp      renderer_opengl/gl_buffer_cache.h      renderer_opengl/gl_compute_pipeline.cpp diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp index 2437121ce..1d22d25f1 100644 --- a/src/video_core/engines/draw_manager.cpp +++ b/src/video_core/engines/draw_manager.cpp @@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {          LOG_WARNING(HW_GPU, "(STUBBED) called");          break;      } +    case MAXWELL3D_REG_INDEX(draw_texture.src_y0): { +        DrawTexture(); +        break; +    }      default:          break;      } @@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) {      ProcessDraw(true, 1);  } +void DrawManager::DrawTexture() { +    const auto& regs{maxwell3d->regs}; +    draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f; +    draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f; +    const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f; +    const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f; +    const bool lower_left{regs.window_origin.mode != +                          Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft}; +    if (lower_left) { +        draw_texture_state.dst_y0 -= dst_height; +    } +    draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width; +    draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height; +    draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f; +    draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f; +    draw_texture_state.src_x1 = +        (static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width + +        draw_texture_state.src_x0; +    draw_texture_state.src_y1 = +        (static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height + +        draw_texture_state.src_y0; +    draw_texture_state.src_sampler = regs.draw_texture.src_sampler; +    draw_texture_state.src_texture = regs.draw_texture.src_texture; + +    maxwell3d->rasterizer->DrawTexture(); +} +  void DrawManager::UpdateTopology() {      const auto& regs{maxwell3d->regs};      switch (regs.primitive_topology_control) { diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 58d1b2d59..7c22c49f1 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h @@ -32,6 +32,19 @@ public:          std::vector<u8> inline_index_draw_indexes;      }; +    struct DrawTextureState { +        f32 dst_x0; +        f32 dst_y0; +        f32 dst_x1; +        f32 dst_y1; +        f32 src_x0; +        f32 src_y0; +        f32 src_x1; +        f32 src_y1; +        u32 src_sampler; +        u32 src_texture; +    }; +      struct IndirectParams {          bool is_indexed;          bool include_count; @@ -64,6 +77,10 @@ public:          return draw_state;      } +    const DrawTextureState& GetDrawTextureState() const { +        return draw_texture_state; +    } +      IndirectParams& GetIndirectParams() {          return indirect_state;      } @@ -81,6 +98,8 @@ private:      void DrawIndexSmall(u32 argument); +    void DrawTexture(); +      void UpdateTopology();      void ProcessDraw(bool draw_indexed, u32 instance_count); @@ -89,6 +108,7 @@ private:      Maxwell3D* maxwell3d{};      State draw_state{}; +    DrawTextureState draw_texture_state{};      IndirectParams indirect_state{};  };  } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 68ceda519..ae9da6290 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) {      case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):      case MAXWELL3D_REG_INDEX(vertex_array_instance_first):      case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): +    case MAXWELL3D_REG_INDEX(draw_texture.src_y0):      case MAXWELL3D_REG_INDEX(wait_for_idle):      case MAXWELL3D_REG_INDEX(shadow_ram_control):      case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0b2fd2928..c89969bb4 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1599,6 +1599,20 @@ public:          };          static_assert(sizeof(TIRModulationCoeff) == 0x4); +        struct DrawTexture { +            s32 dst_x0; +            s32 dst_y0; +            s32 dst_width; +            s32 dst_height; +            s64 dx_du; +            s64 dy_dv; +            u32 src_sampler; +            u32 src_texture; +            s32 src_x0; +            s32 src_y0; +        }; +        static_assert(sizeof(DrawTexture) == 0x30); +          struct ReduceColorThreshold {              union {                  BitField<0, 8, u32> all_hit_once; @@ -2751,7 +2765,7 @@ public:                  u32 reserved_sw_method2;                                               ///< 0x102C                  std::array<TIRModulationCoeff, 5> tir_modulation_coeff;                ///< 0x1030                  std::array<u32, 15> spare_nop;                                         ///< 0x1044 -                INSERT_PADDING_BYTES_NOINIT(0x30); +                DrawTexture draw_texture;                                              ///< 0x1080                  std::array<u32, 7> reserved_sw_method3_to_7;                           ///< 0x10B0                  ReduceColorThreshold reduce_color_thresholds_unorm8;                   ///< 0x10CC                  std::array<u32, 4> reserved_sw_method10_to_13;                         ///< 0x10D0 diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index f275b2aa9..e968ae220 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,6 +11,7 @@ set(GLSL_INCLUDES  set(SHADER_FILES      astc_decoder.comp +    blit_color_float.frag      block_linear_unswizzle_2d.comp      block_linear_unswizzle_3d.comp      convert_abgr8_to_d24s8.frag @@ -36,7 +37,6 @@ set(SHADER_FILES      smaa_blending_weight_calculation.frag      smaa_neighborhood_blending.vert      smaa_neighborhood_blending.frag -    vulkan_blit_color_float.frag      vulkan_blit_depth_stencil.frag      vulkan_fidelityfx_fsr_easu_fp16.comp      vulkan_fidelityfx_fsr_easu_fp32.comp diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/blit_color_float.frag index c0c832296..c0c832296 100644 --- a/src/video_core/host_shaders/vulkan_blit_color_float.frag +++ b/src/video_core/host_shaders/blit_color_float.frag diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert index 2c976b19f..d16d98995 100644 --- a/src/video_core/host_shaders/full_screen_triangle.vert +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -4,13 +4,20 @@  #version 450  #ifdef VULKAN +#define VERTEX_ID gl_VertexIndex  #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {  #define END_PUSH_CONSTANTS };  #define UNIFORM(n) +#define FLIPY 1  #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define VERTEX_ID gl_VertexID  #define BEGIN_PUSH_CONSTANTS  #define END_PUSH_CONSTANTS +#define FLIPY -1  #define UNIFORM(n) layout (location = n) uniform +out gl_PerVertex { +    vec4 gl_Position; +};  #endif  BEGIN_PUSH_CONSTANTS @@ -21,8 +28,8 @@ END_PUSH_CONSTANTS  layout(location = 0) out vec2 texcoord;  void main() { -    float x = float((gl_VertexIndex & 1) << 2); -    float y = float((gl_VertexIndex & 2) << 1); -    gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); +    float x = float((VERTEX_ID & 1) << 2); +    float y = float((VERTEX_ID & 2) << 1); +    gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0);      texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);  } diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1735b6164..33e2610bc 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,6 +47,9 @@ public:      /// Dispatches an indirect draw invocation      virtual void DrawIndirect() {} +    /// Dispatches an draw texture invocation +    virtual void DrawTexture() = 0; +      /// Clear the current framebuffer      virtual void Clear(u32 layer_count) = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 2c11345d7..2b5c7defa 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp  RasterizerNull::~RasterizerNull() = default;  void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} +void RasterizerNull::DrawTexture() {}  void RasterizerNull::Clear(u32 layer_count) {}  void RasterizerNull::DispatchCompute() {}  void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 2112aa70e..51f896e43 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -31,6 +31,7 @@ public:      ~RasterizerNull() override;      void Draw(bool is_indexed, u32 instance_count) override; +    void DrawTexture() override;      void Clear(u32 layer_count) override;      void DispatchCompute() override;      void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp new file mode 100644 index 000000000..9a560a73b --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.cpp @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <algorithm> + +#include "video_core/host_shaders/blit_color_float_frag.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" +#include "video_core/renderer_opengl/blit_image.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +namespace OpenGL { + +BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_) +    : program_manager(program_manager_), +      full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)), +      blit_color_to_color_frag( +          CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {} + +BlitImageHelper::~BlitImageHelper() = default; + +void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, +                                const Region2D& dst_region, const Region2D& src_region, +                                const Extent3D& src_size) { +    glEnable(GL_CULL_FACE); +    glDisable(GL_COLOR_LOGIC_OP); +    glDisable(GL_DEPTH_TEST); +    glDisable(GL_STENCIL_TEST); +    glDisable(GL_POLYGON_OFFSET_FILL); +    glDisable(GL_RASTERIZER_DISCARD); +    glDisable(GL_ALPHA_TEST); +    glDisablei(GL_BLEND, 0); +    glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); +    glCullFace(GL_BACK); +    glFrontFace(GL_CW); +    glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); +    glDepthRangeIndexed(0, 0.0, 0.0); + +    program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle); +    glProgramUniform2f(full_screen_vert.handle, 0, +                       static_cast<float>(src_region.end.x - src_region.start.x) / +                           static_cast<float>(src_size.width), +                       static_cast<float>(src_region.end.y - src_region.start.y) / +                           static_cast<float>(src_size.height)); +    glProgramUniform2f(full_screen_vert.handle, 1, +                       static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), +                       static_cast<float>(src_region.start.y) / +                           static_cast<float>(src_size.height)); +    glViewport(std::min(dst_region.start.x, dst_region.end.x), +               std::min(dst_region.start.y, dst_region.end.y), +               std::abs(dst_region.end.x - dst_region.start.x), +               std::abs(dst_region.end.y - dst_region.start.y)); +    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer); +    glBindSampler(0, src_sampler); +    glBindTextureUnit(0, src_image_view); +    glClear(GL_COLOR_BUFFER_BIT); +    glDrawArrays(GL_TRIANGLES, 0, 3); +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h new file mode 100644 index 000000000..5a2b12d16 --- /dev/null +++ b/src/video_core/renderer_opengl/blit_image.h @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <glad/glad.h> + +#include "video_core/engines/fermi_2d.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/texture_cache/types.h" + +namespace OpenGL { + +using VideoCommon::Extent3D; +using VideoCommon::Offset2D; +using VideoCommon::Region2D; + +class ProgramManager; +class Framebuffer; +class ImageView; + +class BlitImageHelper { +public: +    explicit BlitImageHelper(ProgramManager& program_manager); +    ~BlitImageHelper(); + +    void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler, +                   const Region2D& dst_region, const Region2D& src_region, +                   const Extent3D& src_size); + +private: +    ProgramManager& program_manager; + +    OGLProgram full_screen_vert; +    OGLProgram blit_color_to_color_frag; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index cee5c3247..22ed16ebf 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {      has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");      has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;      has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; +    has_draw_texture = GLAD_GL_NV_draw_texture;      warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;      need_fastmath_off = is_nvidia;      can_report_memory = GLAD_GL_NVX_gpu_memory_info; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2a72d84be..3ff8cad83 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -4,6 +4,8 @@  #pragma once  #include <cstddef> +#include <string> +  #include "common/common_types.h"  #include "core/frontend/emu_window.h"  #include "shader_recompiler/stage.h" @@ -146,6 +148,10 @@ public:          return has_sparse_texture_2;      } +    bool HasDrawTexture() const { +        return has_draw_texture; +    } +      bool IsWarpSizePotentiallyLargerThanGuest() const {          return warp_size_potentially_larger_than_guest;      } @@ -216,6 +222,7 @@ private:      bool has_shader_int64{};      bool has_amd_shader_half_float{};      bool has_sparse_texture_2{}; +    bool has_draw_texture{};      bool warp_size_potentially_larger_than_guest{};      bool need_fastmath_off{};      bool has_cbuf_ftou_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 181857d9c..7bced675c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra        shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,                     state_tracker, gpu.ShaderNotify()),        query_cache(*this), accelerate_dma(buffer_cache), -      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} +      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), +      blit_image(program_manager_) {}  RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -320,6 +321,47 @@ void RasterizerOpenGL::DrawIndirect() {      buffer_cache.SetDrawIndirect(nullptr);  } +void RasterizerOpenGL::DrawTexture() { +    MICROPROFILE_SCOPE(OpenGL_Drawing); + +    SCOPE_EXIT({ gpu.TickWork(); }); +    query_cache.UpdateCounters(); + +    texture_cache.SynchronizeGraphicsDescriptors(); +    texture_cache.UpdateRenderTargets(false); + +    SyncState(); + +    const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); +    const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); +    const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); + +    if (device.HasDrawTexture()) { +        state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + +        glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0, +                        draw_texture_state.dst_y0, draw_texture_state.dst_x1, +                        draw_texture_state.dst_y1, 0, +                        draw_texture_state.src_x0 / static_cast<float>(texture.size.width), +                        draw_texture_state.src_y0 / static_cast<float>(texture.size.height), +                        draw_texture_state.src_x1 / static_cast<float>(texture.size.width), +                        draw_texture_state.src_y1 / static_cast<float>(texture.size.height)); +    } else { +        Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), +                                        .y = static_cast<s32>(draw_texture_state.dst_y0)}, +                               Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), +                                        .y = static_cast<s32>(draw_texture_state.dst_y1)}}; +        Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), +                                        .y = static_cast<s32>(draw_texture_state.src_y0)}, +                               Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), +                                        .y = static_cast<s32>(draw_texture_state.src_y1)}}; +        blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(), +                             sampler->Handle(), dst_region, src_region, texture.size); +    } + +    ++num_queued_commands; +} +  void RasterizerOpenGL::DispatchCompute() {      gpu_memory->FlushCaching();      ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index be4f76c18..0c45832ae 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -16,6 +16,7 @@  #include "video_core/engines/maxwell_dma.h"  #include "video_core/rasterizer_accelerated.h"  #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/blit_image.h"  #include "video_core/renderer_opengl/gl_buffer_cache.h"  #include "video_core/renderer_opengl/gl_device.h"  #include "video_core/renderer_opengl/gl_fence_manager.h" @@ -70,6 +71,7 @@ public:      void Draw(bool is_indexed, u32 instance_count) override;      void DrawIndirect() override; +    void DrawTexture() override;      void Clear(u32 layer_count) override;      void DispatchCompute() override;      void ResetCounter(VideoCore::QueryType type) override; @@ -224,6 +226,8 @@ private:      AccelerateDMA accelerate_dma;      FenceManagerOpenGL fence_manager; +    BlitImageHelper blit_image; +      boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;      std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;      boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index d9c29d8b7..98841ae65 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,2 +1,123 @@  // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project  // SPDX-License-Identifier: GPL-2.0-or-later + +#include <glad/glad.h> + +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ +    GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, +    GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, +}; + +ProgramManager::ProgramManager(const Device& device) { +    glCreateProgramPipelines(1, &pipeline.handle); +    if (device.UseAssemblyShaders()) { +        glEnable(GL_COMPUTE_PROGRAM_NV); +    } +} + +void ProgramManager::BindComputeProgram(GLuint program) { +    glUseProgram(program); +    is_compute_bound = true; +} + +void ProgramManager::BindComputeAssemblyProgram(GLuint program) { +    if (current_assembly_compute_program != program) { +        current_assembly_compute_program = program; +        glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); +    } +    UnbindPipeline(); +} + +void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { +    static constexpr std::array<GLenum, 5> stage_enums{ +        GL_VERTEX_SHADER_BIT,   GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, +        GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, +    }; +    for (size_t stage = 0; stage < NUM_STAGES; ++stage) { +        if (current_programs[stage] != programs[stage].handle) { +            current_programs[stage] = programs[stage].handle; +            glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); +        } +    } +    BindPipeline(); +} + +void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) { +    if (current_programs[0] != vertex) { +        current_programs[0] = vertex; +        glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); +    } +    if (current_programs[4] != fragment) { +        current_programs[4] = fragment; +        glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); +    } +    glUseProgramStages( +        pipeline.handle, +        GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); +    current_programs[1] = 0; +    current_programs[2] = 0; +    current_programs[3] = 0; + +    if (current_stage_mask != 0) { +        current_stage_mask = 0; +        for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { +            glDisable(program_type); +        } +    } +    BindPipeline(); +} + +void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, +                                          u32 stage_mask) { +    const u32 changed_mask = current_stage_mask ^ stage_mask; +    current_stage_mask = stage_mask; + +    if (changed_mask != 0) { +        for (size_t stage = 0; stage < NUM_STAGES; ++stage) { +            if (((changed_mask >> stage) & 1) != 0) { +                if (((stage_mask >> stage) & 1) != 0) { +                    glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); +                } else { +                    glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); +                } +            } +        } +    } +    for (size_t stage = 0; stage < NUM_STAGES; ++stage) { +        if (current_programs[stage] != programs[stage].handle) { +            current_programs[stage] = programs[stage].handle; +            glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); +        } +    } +    UnbindPipeline(); +} + +void ProgramManager::RestoreGuestCompute() {} + +void ProgramManager::BindPipeline() { +    if (!is_pipeline_bound) { +        is_pipeline_bound = true; +        glBindProgramPipeline(pipeline.handle); +    } +    UnbindCompute(); +} + +void ProgramManager::UnbindPipeline() { +    if (is_pipeline_bound) { +        is_pipeline_bound = false; +        glBindProgramPipeline(0); +    } +    UnbindCompute(); +} + +void ProgramManager::UnbindCompute() { +    if (is_compute_bound) { +        is_compute_bound = false; +        glUseProgram(0); +    } +} +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index a84f5aeb3..07ffab77f 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -6,8 +6,6 @@  #include <array>  #include <span> -#include <glad/glad.h> -  #include "video_core/renderer_opengl/gl_device.h"  #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -16,121 +14,28 @@ namespace OpenGL {  class ProgramManager {      static constexpr size_t NUM_STAGES = 5; -    static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ -        GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, -        GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, -    }; -  public: -    explicit ProgramManager(const Device& device) { -        glCreateProgramPipelines(1, &pipeline.handle); -        if (device.UseAssemblyShaders()) { -            glEnable(GL_COMPUTE_PROGRAM_NV); -        } -    } - -    void BindComputeProgram(GLuint program) { -        glUseProgram(program); -        is_compute_bound = true; -    } - -    void BindComputeAssemblyProgram(GLuint program) { -        if (current_assembly_compute_program != program) { -            current_assembly_compute_program = program; -            glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); -        } -        UnbindPipeline(); -    } - -    void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) { -        static constexpr std::array<GLenum, 5> stage_enums{ -            GL_VERTEX_SHADER_BIT,   GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, -            GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, -        }; -        for (size_t stage = 0; stage < NUM_STAGES; ++stage) { -            if (current_programs[stage] != programs[stage].handle) { -                current_programs[stage] = programs[stage].handle; -                glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); -            } -        } -        BindPipeline(); -    } - -    void BindPresentPrograms(GLuint vertex, GLuint fragment) { -        if (current_programs[0] != vertex) { -            current_programs[0] = vertex; -            glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); -        } -        if (current_programs[4] != fragment) { -            current_programs[4] = fragment; -            glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); -        } -        glUseProgramStages( -            pipeline.handle, -            GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); -        current_programs[1] = 0; -        current_programs[2] = 0; -        current_programs[3] = 0; - -        if (current_stage_mask != 0) { -            current_stage_mask = 0; -            for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { -                glDisable(program_type); -            } -        } -        BindPipeline(); -    } +    explicit ProgramManager(const Device& device); + +    void BindComputeProgram(GLuint program); + +    void BindComputeAssemblyProgram(GLuint program); + +    void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs); + +    void BindPresentPrograms(GLuint vertex, GLuint fragment);      void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs, -                              u32 stage_mask) { -        const u32 changed_mask = current_stage_mask ^ stage_mask; -        current_stage_mask = stage_mask; - -        if (changed_mask != 0) { -            for (size_t stage = 0; stage < NUM_STAGES; ++stage) { -                if (((changed_mask >> stage) & 1) != 0) { -                    if (((stage_mask >> stage) & 1) != 0) { -                        glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); -                    } else { -                        glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); -                    } -                } -            } -        } -        for (size_t stage = 0; stage < NUM_STAGES; ++stage) { -            if (current_programs[stage] != programs[stage].handle) { -                current_programs[stage] = programs[stage].handle; -                glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); -            } -        } -        UnbindPipeline(); -    } - -    void RestoreGuestCompute() {} +                              u32 stage_mask); + +    void RestoreGuestCompute();  private: -    void BindPipeline() { -        if (!is_pipeline_bound) { -            is_pipeline_bound = true; -            glBindProgramPipeline(pipeline.handle); -        } -        UnbindCompute(); -    } - -    void UnbindPipeline() { -        if (is_pipeline_bound) { -            is_pipeline_bound = false; -            glBindProgramPipeline(0); -        } -        UnbindCompute(); -    } - -    void UnbindCompute() { -        if (is_compute_bound) { -            is_compute_bound = false; -            glUseProgram(0); -        } -    } +    void BindPipeline(); + +    void UnbindPipeline(); + +    void UnbindCompute();      OGLPipeline pipeline;      bool is_pipeline_bound{}; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 3f2b139e0..dd00d3edf 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,13 +4,13 @@  #include <algorithm>  #include "common/settings.h" +#include "video_core/host_shaders/blit_color_float_frag_spv.h"  #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"  #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"  #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"  #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"  #include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"  #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" -#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"  #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"  #include "video_core/renderer_vulkan/blit_image.h"  #include "video_core/renderer_vulkan/maxwell_to_vk.h" @@ -303,7 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri  }  void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region, -                   const Region2D& src_region) { +                   const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) {      const VkOffset2D offset{          .x = std::min(dst_region.start.x, dst_region.end.x),          .y = std::min(dst_region.start.y, dst_region.end.y), @@ -325,12 +325,15 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi          .offset = offset,          .extent = extent,      }; -    const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x); -    const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y); +    const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) / +                          static_cast<float>(src_size.width); +    const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) / +                          static_cast<float>(src_size.height);      const PushConstants push_constants{          .tex_scale = {scale_x, scale_y}, -        .tex_offset = {static_cast<float>(src_region.start.x), -                       static_cast<float>(src_region.start.y)}, +        .tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width), +                       static_cast<float>(src_region.start.y) / +                           static_cast<float>(src_size.height)},      };      cmdbuf.SetViewport(0, viewport);      cmdbuf.SetScissor(0, scissor); @@ -347,6 +350,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) {          .height = is_rescaled ? resolution.ScaleUp(height) : height,      };  } + +void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, +                           VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { +    constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | +                            VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; +    const VkImageMemoryBarrier barrier{ +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, +        .pNext = nullptr, +        .srcAccessMask = flags, +        .dstAccessMask = flags, +        .oldLayout = source_layout, +        .newLayout = target_layout, +        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, +        .image = image, +        .subresourceRange{ +            .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, +            .baseMipLevel = 0, +            .levelCount = 1, +            .baseArrayLayer = 0, +            .layerCount = 1, +        }, +    }; +    cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, +                           0, barrier); +} + +void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) { +    const VkRenderPass render_pass = framebuffer->RenderPass(); +    const VkFramebuffer framebuffer_handle = framebuffer->Handle(); +    const VkExtent2D render_area = framebuffer->RenderArea(); +    const VkRenderPassBeginInfo renderpass_bi{ +        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, +        .pNext = nullptr, +        .renderPass = render_pass, +        .framebuffer = framebuffer_handle, +        .renderArea{ +            .offset{}, +            .extent = render_area, +        }, +        .clearValueCount = 0, +        .pClearValues = nullptr, +    }; +    cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); +}  } // Anonymous namespace  BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, @@ -365,7 +413,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,        two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(            PipelineLayoutCreateInfo(two_textures_set_layout.address()))),        full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)), -      blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), +      blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),        blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),        convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),        convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), @@ -404,6 +452,32 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView      scheduler.InvalidateState();  } +void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, +                                VkImage src_image, VkSampler src_sampler, +                                const Region2D& dst_region, const Region2D& src_region, +                                const Extent3D& src_size) { +    const BlitImagePipelineKey key{ +        .renderpass = dst_framebuffer->RenderPass(), +        .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy, +    }; +    const VkPipelineLayout layout = *one_texture_pipeline_layout; +    const VkPipeline pipeline = FindOrEmplaceColorPipeline(key); +    scheduler.RequestOutsideRenderPassOperationContext(); +    scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region, +                      src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) { +        TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL); +        BeginRenderPass(cmdbuf, dst_framebuffer); +        const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); +        UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view); +        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); +        cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, +                                  nullptr); +        BindBlitState(cmdbuf, layout, dst_region, src_region, src_size); +        cmdbuf.Draw(3, 1, 0, 0); +        cmdbuf.EndRenderPass(); +    }); +} +  void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,                                         VkImageView src_depth_view, VkImageView src_stencil_view,                                         const Region2D& dst_region, const Region2D& src_region, diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 5df679fb4..be8a9a2f6 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -10,6 +10,8 @@  namespace Vulkan { +using VideoCommon::Extent3D; +using VideoCommon::Offset2D;  using VideoCommon::Region2D;  class Device; @@ -36,6 +38,10 @@ public:                     Tegra::Engines::Fermi2D::Filter filter,                     Tegra::Engines::Fermi2D::Operation operation); +    void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view, +                   VkImage src_image, VkSampler src_sampler, const Region2D& dst_region, +                   const Region2D& src_region, const Extent3D& src_size); +      void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,                            VkImageView src_stencil_view, const Region2D& dst_region,                            const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b75b8eec6..86ef0daeb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -266,6 +266,35 @@ void RasterizerVulkan::DrawIndirect() {      buffer_cache.SetDrawIndirect(nullptr);  } +void RasterizerVulkan::DrawTexture() { +    MICROPROFILE_SCOPE(Vulkan_Drawing); + +    SCOPE_EXIT({ gpu.TickWork(); }); +    FlushWork(); + +    query_cache.UpdateCounters(); + +    texture_cache.SynchronizeGraphicsDescriptors(); +    texture_cache.UpdateRenderTargets(false); + +    UpdateDynamicStates(); + +    const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); +    const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); +    const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); +    Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0), +                                    .y = static_cast<s32>(draw_texture_state.dst_y0)}, +                           Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1), +                                    .y = static_cast<s32>(draw_texture_state.dst_y1)}}; +    Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0), +                                    .y = static_cast<s32>(draw_texture_state.src_y0)}, +                           Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1), +                                    .y = static_cast<s32>(draw_texture_state.src_y1)}}; +    blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), +                         texture.ImageHandle(), sampler->Handle(), dst_region, src_region, +                         texture.size); +} +  void RasterizerVulkan::Clear(u32 layer_count) {      MICROPROFILE_SCOPE(Vulkan_Clearing); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 472cc64d9..a0508b57c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -66,6 +66,7 @@ public:      void Draw(bool is_indexed, u32 instance_count) override;      void DrawIndirect() override; +    void DrawTexture() override;      void Clear(u32 layer_count) override;      void DispatchCompute() override;      void ResetCounter(VideoCore::QueryType type) override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 87152c8e9..1b01990a4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -149,6 +149,13 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {  }  template <class P> +typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept { +    const auto image_view_id = VisitImageView(channel_state->graphics_image_table, +                                              channel_state->graphics_image_view_ids, index); +    return slot_image_views[image_view_id]; +} + +template <class P>  void TextureCache<P>::MarkModification(ImageId id) noexcept {      MarkModification(slot_images[id]);  } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4eea1f609..485eaabaa 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -129,6 +129,9 @@ public:      /// Return a reference to the given image view id      [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; +    /// Get the imageview from the graphics descriptor table in the specified index +    [[nodiscard]] ImageView& GetImageView(u32 index) noexcept; +      /// Mark an image as modified from the GPU      void MarkModification(ImageId id) noexcept; | 
