diff options
| author | bunnei <bunneidev@gmail.com> | 2018-04-24 01:09:02 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-04-24 01:09:02 -0400 | 
| commit | 07dc0bbf3e10c030a32f6853de31642162ce988d (patch) | |
| tree | a6f3ae5a2a5a6567c6c8293a60352c9db83d9da2 /src/video_core | |
| parent | 0214351f4f0e9377792f8ceb657e3a47aba334d1 (diff) | |
| parent | 9531a2928369bcc094c28b77408811d47c46c7f2 (diff) | |
Merge pull request #379 from Subv/multi_buffers
GPU: Support multiple enabled vertex arrays.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 121 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 6 | 
3 files changed, 89 insertions, 43 deletions
| diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d4fcedace..609504795 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -500,6 +500,11 @@ public:                          return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) |                                                       start_low);                      } + +                    bool IsEnabled() const { +                        return enable != 0 && StartAddress() != 0; +                    } +                  } vertex_array[NumVertexArrays];                  Blend blend; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d4a0d6db..82001e7b4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -127,7 +127,8 @@ RasterizerOpenGL::~RasterizerOpenGL() {      }  } -void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { +std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, +                                                             GLintptr buffer_offset) {      MICROPROFILE_SCOPE(OpenGL_VAO);      const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;      const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; @@ -136,43 +137,59 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {      state.draw.vertex_buffer = stream_buffer->GetHandle();      state.Apply(); -    // TODO(bunnei): Add support for 1+ vertex arrays -    const auto& vertex_array{regs.vertex_array[0]}; -    const auto& vertex_array_limit{regs.vertex_array_limit[0]}; -    ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); -    ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); -    for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { -        ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); +    // Upload all guest vertex arrays sequentially to our buffer +    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { +        const auto& vertex_array = regs.vertex_array[index]; +        if (!vertex_array.IsEnabled()) +            continue; + +        const Tegra::GPUVAddr start = vertex_array.StartAddress(); +        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); + +        ASSERT(end > start); +        u64 size = end - start + 1; + +        // Copy vertex array data +        const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)}; +        res_cache.FlushRegion(data_addr, size, nullptr); +        Memory::ReadBlock(data_addr, array_ptr, size); + +        // Bind the vertex array to the buffer at the current offset. +        glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); + +        ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); + +        array_ptr += size; +        buffer_offset += size;      }      // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.      // Enables the first 16 vertex attributes always, as we don't know which ones are actually used -    // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now +    // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now      // to avoid OpenGL errors. +    // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't +    // assume every shader uses them all.      for (unsigned index = 0; index < 16; ++index) {          auto& attrib = regs.vertex_attrib_format[index];          NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",                      index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),                      attrib.offset.Value(), attrib.IsNormalized()); -        glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), -                              attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, -                              reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); +        auto& buffer = regs.vertex_array[attrib.buffer]; +        ASSERT(buffer.IsEnabled()); +          glEnableVertexAttribArray(index); +        glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), +                             attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); +        glVertexAttribBinding(index, attrib.buffer); +          hw_vao_enabled_attributes[index] = true;      } -    // Copy vertex array data -    const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; -    const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; -    res_cache.FlushRegion(data_addr, data_size, nullptr); -    Memory::ReadBlock(data_addr, array_ptr, data_size); - -    array_ptr += data_size; -    buffer_offset += data_size; +    return {array_ptr, buffer_offset};  } -void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { +void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {      // Helper function for uploading uniform data      const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {          if (has_ARB_direct_state_access) { @@ -190,8 +207,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size      u32 current_constbuffer_bindpoint = 0;      for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { -        ptr_pos += sizeof(GLShader::MaxwellUniformData); -          auto& shader_config = gpu.regs.shader_config[index];          const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -205,13 +220,16 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size          }          // Upload uniform data as one UBO per stage -        const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); +        const GLintptr ubo_offset = buffer_offset;          copy_buffer(uniform_buffers[stage].handle, ubo_offset,                      sizeof(GLShader::MaxwellUniformData));          GLShader::MaxwellUniformData* ub_ptr = -            reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); +            reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr);          ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); +        buffer_ptr += sizeof(GLShader::MaxwellUniformData); +        buffer_offset += sizeof(GLShader::MaxwellUniformData); +          // Fetch program code from memory          GLShader::ProgramCode program_code;          const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; @@ -252,6 +270,24 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size      shader_program_manager->UseTrivialGeometryShader();  } +size_t RasterizerOpenGL::CalculateVertexArraysSize() const { +    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + +    size_t size = 0; +    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { +        if (!regs.vertex_array[index].IsEnabled()) +            continue; + +        const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); +        const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); + +        ASSERT(end > start); +        size += end - start + 1; +    } + +    return size; +} +  bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {      accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;      DrawArrays(); @@ -329,44 +365,49 @@ void RasterizerOpenGL::DrawArrays() {      const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};      const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; -    // TODO(bunnei): Add support for 1+ vertex arrays -    vs_input_size = vertex_num * regs.vertex_array[0].stride; -      state.draw.vertex_buffer = stream_buffer->GetHandle();      state.Apply(); -    size_t buffer_size = static_cast<size_t>(vs_input_size); +    size_t buffer_size = CalculateVertexArraysSize(); +      if (is_indexed) { -        buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; +        buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size;      }      // Uniform space for the 5 shader stages -    buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; +    buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + +                  sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; -    size_t ptr_pos = 0;      u8* buffer_ptr;      GLintptr buffer_offset;      std::tie(buffer_ptr, buffer_offset) =          stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); -    SetupVertexArray(buffer_ptr, buffer_offset); -    ptr_pos += vs_input_size; +    u8* offseted_buffer; +    std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); + +    offseted_buffer = +        reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); +    buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);      // If indexed mode, copy the index buffer      GLintptr index_buffer_offset = 0;      if (is_indexed) { -        ptr_pos = Common::AlignUp(ptr_pos, 4); -          const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;          const VAddr index_data_addr{              memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; -        Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); +        Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size); -        index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); -        ptr_pos += index_buffer_size; +        index_buffer_offset = buffer_offset; +        offseted_buffer += index_buffer_size; +        buffer_offset += index_buffer_size;      } -    SetupShaders(buffer_ptr, buffer_offset, ptr_pos); +    offseted_buffer = +        reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); +    buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); + +    SetupShaders(offseted_buffer, buffer_offset);      stream_buffer->Unmap(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 03e02b52a..544714b95 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -148,13 +148,13 @@ private:      static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024;      std::unique_ptr<OGLStreamBuffer> stream_buffer; -    GLsizeiptr vs_input_size; +    size_t CalculateVertexArraysSize() const; -    void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); +    std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);      std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; -    void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); +    void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);      enum class AccelDraw { Disabled, Arrays, Indexed };      AccelDraw accelerate_draw; | 
