diff options
| author | Markus Wick <markus@selfnet.de> | 2018-09-05 11:36:50 +0200 | 
|---|---|---|
| committer | Markus Wick <markus@selfnet.de> | 2018-09-05 18:46:35 +0200 | 
| commit | d3ad9469a172eeaaf34ca641a6bf679b7b10eedf (patch) | |
| tree | f4883bd9d06a1f370039e96c41beea70ff1a3f2b /src/video_core | |
| parent | 527e362a839632ab2d9f7fae3147f53709469d32 (diff) | |
gl_rasterizer: Implement a VAO cache.
This patch caches VAO objects instead of re-emiting all pointers per draw call.
Configuring this pointers is known as a fast task, but it yields too many GL
calls. So for better performance, just bind the VAO instead of 16 pointers.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 96 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 8 | 
3 files changed, 60 insertions, 53 deletions
| diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 92bfda053..f59d01738 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -127,6 +127,7 @@ public:                  BitField<21, 6, Size> size;                  BitField<27, 3, Type> type;                  BitField<31, 1, u32> bgra; +                u32 hex;              };              u32 ComponentCount() const { @@ -262,6 +263,10 @@ public:              bool IsValid() const {                  return size != Size::Invalid;              } + +            bool operator<(const VertexAttribute& other) const { +                return hex < other.hex; +            }          };          enum class PrimitiveTopology : u32 { @@ -545,7 +550,7 @@ public:                  INSERT_PADDING_WORDS(0x5B); -                VertexAttribute vertex_attrib_format[NumVertexAttributes]; +                std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;                  INSERT_PADDING_WORDS(0xF); @@ -964,7 +969,7 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);  ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);  ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);  ASSERT_REG_POSITION(zeta, 0x3F8); -ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458); +ASSERT_REG_POSITION(vertex_attrib_format, 0x458);  ASSERT_REG_POSITION(rt_control, 0x487);  ASSERT_REG_POSITION(zeta_width, 0x48a);  ASSERT_REG_POSITION(zeta_height, 0x48b); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c66a18155..5d493a2b2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -70,28 +70,13 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo      // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0      state.clip_distance[0] = true; -    // Generate VAO and UBO -    sw_vao.Create(); -    uniform_buffer.Create(); - -    state.draw.vertex_array = sw_vao.handle; -    state.draw.uniform_buffer = uniform_buffer.handle; -    state.Apply(); -      // Create render framebuffer      framebuffer.Create(); -    hw_vao.Create(); - -    state.draw.vertex_buffer = buffer_cache.GetHandle(); -      shader_program_manager = std::make_unique<GLShader::ProgramManager>();      state.draw.shader_program = 0; -    state.draw.vertex_array = hw_vao.handle;      state.Apply(); -    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle()); -      glEnable(GL_BLEND);      glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); @@ -106,7 +91,54 @@ void RasterizerOpenGL::SetupVertexArrays() {      const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();      const auto& regs = gpu.regs; -    state.draw.vertex_array = hw_vao.handle; +    auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format); +    auto& VAO = iter->second; + +    if (is_cache_miss) { +        VAO.Create(); +        state.draw.vertex_array = VAO.handle; +        state.Apply(); + +        // The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work +        // around. +        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle()); + +        // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. +        // Enables the first 16 vertex attributes always, as we don't know which ones are actually +        // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 +        // for now to avoid OpenGL errors. +        // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't +        // assume every shader uses them all. +        for (unsigned index = 0; index < 16; ++index) { +            const auto& attrib = regs.vertex_attrib_format[index]; + +            // Ignore invalid attributes. +            if (!attrib.IsValid()) +                continue; + +            const auto& buffer = regs.vertex_array[attrib.buffer]; +            LOG_TRACE(HW_GPU, +                      "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", +                      index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), +                      attrib.offset.Value(), attrib.IsNormalized()); + +            ASSERT(buffer.IsEnabled()); + +            glEnableVertexAttribArray(index); +            if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt || +                attrib.type == +                    Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) { +                glVertexAttribIFormat(index, attrib.ComponentCount(), +                                      MaxwellToGL::VertexType(attrib), attrib.offset); +            } else { +                glVertexAttribFormat(index, attrib.ComponentCount(), +                                     MaxwellToGL::VertexType(attrib), +                                     attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); +            } +            glVertexAttribBinding(index, attrib.buffer); +        } +    } +    state.draw.vertex_array = VAO.handle;      state.draw.vertex_buffer = buffer_cache.GetHandle();      state.Apply(); @@ -142,38 +174,6 @@ void RasterizerOpenGL::SetupVertexArrays() {              glVertexBindingDivisor(index, 0);          }      } - -    // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. -    // Enables the first 16 vertex attributes always, as we don't know which ones are actually used -    // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now -    // to avoid OpenGL errors. -    // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't -    // assume every shader uses them all. -    for (unsigned index = 0; index < 16; ++index) { -        auto& attrib = regs.vertex_attrib_format[index]; - -        // Ignore invalid attributes. -        if (!attrib.IsValid()) -            continue; - -        auto& buffer = regs.vertex_array[attrib.buffer]; -        LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", -                  index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), -                  attrib.offset.Value(), attrib.IsNormalized()); - -        ASSERT(buffer.IsEnabled()); - -        glEnableVertexAttribArray(index); -        if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt || -            attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) { -            glVertexAttribIFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), -                                  attrib.offset); -        } else { -            glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), -                                 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); -        } -        glVertexAttribBinding(index, attrib.buffer); -    }  }  void RasterizerOpenGL::SetupShaders() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4c4b084b8..9c30dc0e8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -6,6 +6,7 @@  #include <array>  #include <cstddef> +#include <map>  #include <memory>  #include <tuple>  #include <utility> @@ -168,14 +169,15 @@ private:      ScreenInfo& screen_info;      std::unique_ptr<GLShader::ProgramManager> shader_program_manager; -    OGLVertexArray sw_vao; -    OGLVertexArray hw_vao; +    std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute, +                        Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>, +             OGLVertexArray> +        vertex_array_cache;      std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;      static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;      OGLBufferCache buffer_cache; -    OGLBuffer uniform_buffer;      OGLFramebuffer framebuffer;      GLint uniform_buffer_alignment; | 
