diff options
| author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2018-10-02 14:47:26 -0300 | 
|---|---|---|
| committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2018-10-04 00:03:44 -0300 | 
| commit | 3e2380327a99196ac2bb18668f6f53c4248bcca1 (patch) | |
| tree | f0e330bb43f9611c5319d7d51c27c5d2fb2ebc5b | |
| parent | 393042c09c38246a134cb89c077b19a409f3566e (diff) | |
gl_rasterizer: Implement quads topology
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_primitive_assembler.cpp | 64 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_primitive_assembler.h | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 146 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 7 | 
8 files changed, 236 insertions, 46 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f5ae57039..09ecc5bad 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -27,6 +27,8 @@ add_library(video_core STATIC      renderer_base.h      renderer_opengl/gl_buffer_cache.cpp      renderer_opengl/gl_buffer_cache.h +    renderer_opengl/gl_primitive_assembler.cpp +    renderer_opengl/gl_primitive_assembler.h      renderer_opengl/gl_rasterizer.cpp      renderer_opengl/gl_rasterizer.h      renderer_opengl/gl_rasterizer_cache.cpp diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 9f5581045..4290da33f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -744,6 +744,12 @@ public:                          return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |                                                       end_addr_low);                      } + +                    /// Adjust the index buffer offset so it points to the first desired index. +                    GPUVAddr IndexStart() const { +                        return StartAddress() + static_cast<size_t>(first) * +                                                    static_cast<size_t>(FormatSizeInBytes()); +                    }                  } index_array;                  INSERT_PADDING_WORDS(0x7); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 578aca789..c142095c5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -34,7 +34,7 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size      }      AlignBuffer(alignment); -    GLintptr uploaded_offset = buffer_offset; +    const GLintptr uploaded_offset = buffer_offset;      Memory::ReadBlock(*cpu_addr, buffer_ptr, size); @@ -57,13 +57,23 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s                                            std::size_t alignment) {      AlignBuffer(alignment);      std::memcpy(buffer_ptr, raw_pointer, size); -    GLintptr uploaded_offset = buffer_offset; +    const GLintptr uploaded_offset = buffer_offset;      buffer_ptr += size;      buffer_offset += size;      return uploaded_offset;  } +std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) { +    AlignBuffer(alignment); +    u8* const uploaded_ptr = buffer_ptr; +    const GLintptr uploaded_offset = buffer_offset; + +    buffer_ptr += size; +    buffer_offset += size; +    return std::make_tuple(uploaded_ptr, uploaded_offset); +} +  void OGLBufferCache::Map(std::size_t max_size) {      bool invalidate;      std::tie(buffer_ptr, buffer_offset_base, invalidate) = @@ -74,6 +84,7 @@ void OGLBufferCache::Map(std::size_t max_size) {          InvalidateAll();      }  } +  void OGLBufferCache::Unmap() {      stream_buffer.Unmap(buffer_offset - buffer_offset_base);  } @@ -84,7 +95,7 @@ GLuint OGLBufferCache::GetHandle() const {  void OGLBufferCache::AlignBuffer(std::size_t alignment) {      // Align the offset, not the mapped pointer -    GLintptr offset_aligned = +    const GLintptr offset_aligned =          static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));      buffer_ptr += offset_aligned - buffer_offset;      buffer_offset = offset_aligned; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 6c18461f4..965976334 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -6,6 +6,7 @@  #include <cstddef>  #include <memory> +#include <tuple>  #include "common/common_types.h"  #include "video_core/rasterizer_cache.h" @@ -33,11 +34,17 @@ class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBuffer  public:      explicit OGLBufferCache(std::size_t size); +    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been +    /// allocated.      GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,                            bool cache = true); +    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.      GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); +    /// Reserves memory to be used by host's CPU. Returns mapped address and offset. +    std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); +      void Map(std::size_t max_size);      void Unmap(); diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp new file mode 100644 index 000000000..ee1d9601b --- /dev/null +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp @@ -0,0 +1,64 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_primitive_assembler.h" + +namespace OpenGL { + +constexpr u32 TRIANGLES_PER_QUAD = 6; +constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3}; + +PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {} + +PrimitiveAssembler::~PrimitiveAssembler() = default; + +std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const { +    ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4"); +    return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint); +} + +GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) { +    const std::size_t size{CalculateQuadSize(count)}; +    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size); + +    for (u32 primitive = 0; primitive < count / 4; ++primitive) { +        for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) { +            const u32 index = first + primitive * 4 + QUAD_MAP[i]; +            std::memcpy(dst_pointer, &index, sizeof(index)); +            dst_pointer += sizeof(index); +        } +    } + +    return index_offset; +} + +GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, +                                             u32 count) { +    const std::size_t map_size{CalculateQuadSize(count)}; +    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); + +    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); +    const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; +    const u8* source{Memory::GetPointer(*cpu_addr)}; + +    for (u32 primitive = 0; primitive < count / 4; ++primitive) { +        for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { +            const u32 index = primitive * 4 + QUAD_MAP[i]; +            const u8* src_offset = source + (index * index_size); + +            std::memcpy(dst_pointer, src_offset, index_size); +            dst_pointer += index_size; +        } +    } + +    return index_offset; +} + +} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h new file mode 100644 index 000000000..a8cb88eb5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h @@ -0,0 +1,33 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> +#include <glad/glad.h> + +#include "common/common_types.h" +#include "video_core/memory_manager.h" + +namespace OpenGL { + +class OGLBufferCache; + +class PrimitiveAssembler { +public: +    explicit PrimitiveAssembler(OGLBufferCache& buffer_cache); +    ~PrimitiveAssembler(); + +    /// Calculates the size required by MakeQuadArray and MakeQuadIndexed. +    std::size_t CalculateQuadSize(u32 count) const; + +    GLintptr MakeQuadArray(u32 first, u32 count); + +    GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count); + +private: +    OGLBufferCache& buffer_cache; +}; + +} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 587d9dffb..60dcdc184 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -42,6 +42,41 @@ MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(12  MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));  MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));  MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); +MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); + +struct DrawParameters { +    GLenum primitive_mode; +    GLsizei count; +    GLint current_instance; +    bool use_indexed; + +    GLint vertex_first; + +    GLenum index_format; +    GLint base_vertex; +    GLintptr index_buffer_offset; + +    void DispatchDraw() const { +        if (use_indexed) { +            const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); +            if (current_instance > 0) { +                glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, +                                                              index_buffer_ptr, 1, base_vertex, +                                                              current_instance); +            } else { +                glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, +                                         base_vertex); +            } +        } else { +            if (current_instance > 0) { +                glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1, +                                                  current_instance); +            } else { +                glDrawArrays(primitive_mode, vertex_first, count); +            } +        } +    } +};  RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)      : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { @@ -172,6 +207,53 @@ void RasterizerOpenGL::SetupVertexArrays() {      }  } +DrawParameters RasterizerOpenGL::SetupDraw() { +    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); +    const auto& regs = gpu.regs; +    const bool is_indexed = accelerate_draw == AccelDraw::Indexed; + +    DrawParameters params{}; +    params.current_instance = gpu.state.current_instance; + +    if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { +        MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly); + +        params.use_indexed = true; +        params.primitive_mode = GL_TRIANGLES; + +        if (is_indexed) { +            params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); +            params.count = (regs.index_array.count / 4) * 6; +            params.index_buffer_offset = primitive_assembler.MakeQuadIndexed( +                regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(), +                regs.index_array.count); +            params.base_vertex = static_cast<GLint>(regs.vb_element_base); +        } else { +            // MakeQuadArray always generates u32 indexes +            params.index_format = GL_UNSIGNED_INT; +            params.count = (regs.vertex_buffer.count / 4) * 6; +            params.index_buffer_offset = +                primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count); +        } +        return params; +    } + +    params.use_indexed = is_indexed; +    params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); + +    if (is_indexed) { +        MICROPROFILE_SCOPE(OpenGL_Index); +        params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); +        params.count = regs.index_array.count; +        params.index_buffer_offset = +            buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); +        params.base_vertex = static_cast<GLint>(regs.vb_element_base); +    } else { +        params.count = regs.vertex_buffer.count; +        params.vertex_first = regs.vertex_buffer.first; +    } +} +  void RasterizerOpenGL::SetupShaders() {      MICROPROFILE_SCOPE(OpenGL_Shader);      const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); @@ -256,6 +338,13 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {      return size;  } +std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { +    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + +    return static_cast<std::size_t>(regs.index_array.count) * +           static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); +} +  bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {      accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;      DrawArrays(); @@ -459,16 +548,23 @@ void RasterizerOpenGL::DrawArrays() {      // Draw the vertex batch      const bool is_indexed = accelerate_draw == AccelDraw::Indexed; -    const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) * -                                static_cast<u64>(regs.index_array.FormatSizeInBytes())};      state.draw.vertex_buffer = buffer_cache.GetHandle();      state.Apply();      std::size_t buffer_size = CalculateVertexArraysSize(); -    if (is_indexed) { -        buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size; +    // Add space for index buffer (keeping in mind non-core primitives) +    switch (regs.draw.topology) { +    case Maxwell::PrimitiveTopology::Quads: +        buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + +                      primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); +        break; +    default: +        if (is_indexed) { +            buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize(); +        } +        break;      }      // Uniform space for the 5 shader stages @@ -482,20 +578,7 @@ void RasterizerOpenGL::DrawArrays() {      buffer_cache.Map(buffer_size);      SetupVertexArrays(); - -    // If indexed mode, copy the index buffer -    GLintptr index_buffer_offset = 0; -    if (is_indexed) { -        MICROPROFILE_SCOPE(OpenGL_Index); - -        // Adjust the index buffer offset so it points to the first desired index. -        auto index_start = regs.index_array.StartAddress(); -        index_start += static_cast<size_t>(regs.index_array.first) * -                       static_cast<size_t>(regs.index_array.FormatSizeInBytes()); - -        index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size); -    } - +    DrawParameters params = SetupDraw();      SetupShaders();      buffer_cache.Unmap(); @@ -503,31 +586,8 @@ void RasterizerOpenGL::DrawArrays() {      shader_program_manager->ApplyTo(state);      state.Apply(); -    const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; -    if (is_indexed) { -        const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)}; - -        if (gpu.state.current_instance > 0) { -            glDrawElementsInstancedBaseVertexBaseInstance( -                primitive_mode, regs.index_array.count, -                MaxwellToGL::IndexFormat(regs.index_array.format), -                reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex, -                gpu.state.current_instance); -        } else { -            glDrawElementsBaseVertex(primitive_mode, regs.index_array.count, -                                     MaxwellToGL::IndexFormat(regs.index_array.format), -                                     reinterpret_cast<const void*>(index_buffer_offset), -                                     base_vertex); -        } -    } else { -        if (gpu.state.current_instance > 0) { -            glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first, -                                              regs.vertex_buffer.count, 1, -                                              gpu.state.current_instance); -        } else { -            glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count); -        } -    } +    // Execute draw call +    params.DispatchDraw();      // Disable scissor test      state.scissor.enabled = false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4c8ecbd1c..bf954bb5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -23,6 +23,7 @@  #include "video_core/rasterizer_cache.h"  #include "video_core/rasterizer_interface.h"  #include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_primitive_assembler.h"  #include "video_core/renderer_opengl/gl_rasterizer_cache.h"  #include "video_core/renderer_opengl/gl_resource_manager.h"  #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -38,6 +39,7 @@ class EmuWindow;  namespace OpenGL {  struct ScreenInfo; +struct DrawParameters;  class RasterizerOpenGL : public VideoCore::RasterizerInterface {  public: @@ -192,12 +194,17 @@ private:      static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;      OGLBufferCache buffer_cache;      OGLFramebuffer framebuffer; +    PrimitiveAssembler primitive_assembler{buffer_cache};      GLint uniform_buffer_alignment;      std::size_t CalculateVertexArraysSize() const; +    std::size_t CalculateIndexBufferSize() const; +      void SetupVertexArrays(); +    DrawParameters SetupDraw(); +      void SetupShaders();      enum class AccelDraw { Disabled, Arrays, Indexed };  | 
