diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 36 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.cpp | 201 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.h | 42 | 
4 files changed, 98 insertions, 184 deletions
| diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 38a7b1413..94e3f59a7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,7 +36,8 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));  MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));  MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} { +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) +    : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) {      // Create sampler objects      for (size_t i = 0; i < texture_samplers.size(); ++i) {          texture_samplers[i].Create(); @@ -57,9 +58,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind          const std::string_view extension{              reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; -        if (extension == "GL_ARB_buffer_storage") { -            has_ARB_buffer_storage = true; -        } else if (extension == "GL_ARB_direct_state_access") { +        if (extension == "GL_ARB_direct_state_access") {              has_ARB_direct_state_access = true;          } else if (extension == "GL_ARB_separate_shader_objects") {              has_ARB_separate_shader_objects = true; @@ -86,16 +85,14 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind      hw_vao.Create(); -    stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); -    stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); -    state.draw.vertex_buffer = stream_buffer->GetHandle(); +    state.draw.vertex_buffer = stream_buffer.GetHandle();      shader_program_manager = std::make_unique<GLShader::ProgramManager>();      state.draw.shader_program = 0;      state.draw.vertex_array = hw_vao.handle;      state.Apply(); -    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); +    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle());      for (unsigned index = 0; index < uniform_buffers.size(); ++index) {          auto& buffer = uniform_buffers[index]; @@ -111,13 +108,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind      LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");  } -RasterizerOpenGL::~RasterizerOpenGL() { -    if (stream_buffer != nullptr) { -        state.draw.vertex_buffer = stream_buffer->GetHandle(); -        state.Apply(); -        stream_buffer->Release(); -    } -} +RasterizerOpenGL::~RasterizerOpenGL() {}  std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,                                                               GLintptr buffer_offset) { @@ -126,7 +117,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,      const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;      state.draw.vertex_array = hw_vao.handle; -    state.draw.vertex_buffer = stream_buffer->GetHandle(); +    state.draw.vertex_buffer = stream_buffer.GetHandle();      state.Apply();      // Upload all guest vertex arrays sequentially to our buffer @@ -145,7 +136,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,          Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);          // Bind the vertex array to the buffer at the current offset. -        glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); +        glBindVertexBuffer(index, stream_buffer.GetHandle(), buffer_offset, vertex_array.stride);          ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); @@ -205,7 +196,7 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {      // Helper function for uploading uniform data      const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {          if (has_ARB_direct_state_access) { -            glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); +            glCopyNamedBufferSubData(stream_buffer.GetHandle(), handle, offset, 0, size);          } else {              glBindBuffer(GL_COPY_WRITE_BUFFER, handle);              glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); @@ -456,7 +447,7 @@ void RasterizerOpenGL::DrawArrays() {      const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};      const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; -    state.draw.vertex_buffer = stream_buffer->GetHandle(); +    state.draw.vertex_buffer = stream_buffer.GetHandle();      state.Apply();      size_t buffer_size = CalculateVertexArraysSize(); @@ -471,8 +462,8 @@ void RasterizerOpenGL::DrawArrays() {      u8* buffer_ptr;      GLintptr buffer_offset; -    std::tie(buffer_ptr, buffer_offset) = -        stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); +    std::tie(buffer_ptr, buffer_offset, std::ignore) = +        stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);      u8* offseted_buffer;      std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); @@ -500,7 +491,8 @@ void RasterizerOpenGL::DrawArrays() {      SetupShaders(offseted_buffer, buffer_offset); -    stream_buffer->Unmap(); +    // TODO: Don't use buffer_size here, use the updated buffer_offset. +    stream_buffer.Unmap(buffer_size);      shader_program_manager->ApplyTo(state);      state.Apply(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bd01dc0ae..19146777c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -139,7 +139,6 @@ private:      /// Syncs the blend state to match the guest state      void SyncBlendState(); -    bool has_ARB_buffer_storage = false;      bool has_ARB_direct_state_access = false;      bool has_ARB_separate_shader_objects = false;      bool has_ARB_vertex_attrib_binding = false; @@ -160,7 +159,7 @@ private:          ssbos;      static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; -    std::unique_ptr<OGLStreamBuffer> stream_buffer; +    OGLStreamBuffer stream_buffer;      OGLBuffer uniform_buffer;      OGLFramebuffer framebuffer; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index a2713e9f0..03a8ed8b7 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,174 +9,91 @@  #include "video_core/renderer_opengl/gl_state.h"  #include "video_core/renderer_opengl/gl_stream_buffer.h" -class OrphanBuffer : public OGLStreamBuffer { -public: -    explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} -    ~OrphanBuffer() override; - -private: -    void Create(size_t size, size_t sync_subdivide) override; -    void Release() override; - -    std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; -    void Unmap() override; - -    std::vector<u8> data; -}; - -class StorageBuffer : public OGLStreamBuffer { -public: -    explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} -    ~StorageBuffer() override; - -private: -    void Create(size_t size, size_t sync_subdivide) override; -    void Release() override; - -    std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; -    void Unmap() override; - -    struct Fence { -        OGLSync sync; -        size_t offset; -    }; -    std::deque<Fence> head; -    std::deque<Fence> tail; - -    u8* mapped_ptr; -}; - -OGLStreamBuffer::OGLStreamBuffer(GLenum target) { -    gl_target = target; -} - -GLuint OGLStreamBuffer::GetHandle() const { -    return gl_buffer.handle; -} +OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) +    : gl_target(target), buffer_size(size) { +    gl_buffer.Create(); +    glBindBuffer(gl_target, gl_buffer.handle); -std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { -    if (storage_buffer) { -        return std::make_unique<StorageBuffer>(target); +    GLsizeiptr allocate_size = size; +    if (target == GL_ARRAY_BUFFER) { +        // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer +        // read position is near the end and is an out-of-bound access to the vertex buffer. This is +        // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the +        // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the +        // crash. +        allocate_size *= 2;      } -    return std::make_unique<OrphanBuffer>(target); -} -OrphanBuffer::~OrphanBuffer() { -    Release(); +    if (GLAD_GL_ARB_buffer_storage) { +        persistent = true; +        coherent = prefer_coherent; +        GLbitfield flags = +            GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); +        glBufferStorage(gl_target, allocate_size, nullptr, flags); +        mapped_ptr = static_cast<u8*>(glMapBufferRange( +            gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); +    } else { +        glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); +    }  } -void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { -    buffer_pos = 0; -    buffer_size = size; -    data.resize(buffer_size); - -    if (gl_buffer.handle == 0) { -        gl_buffer.Create(); +OGLStreamBuffer::~OGLStreamBuffer() { +    if (persistent) {          glBindBuffer(gl_target, gl_buffer.handle); +        glUnmapBuffer(gl_target);      } - -    glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); -} - -void OrphanBuffer::Release() {      gl_buffer.Release();  } -std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { -    buffer_pos = Common::AlignUp(buffer_pos, alignment); - -    if (buffer_pos + size > buffer_size) { -        Create(std::max(buffer_size, size), 0); -    } - -    mapped_size = size; -    return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); -} - -void OrphanBuffer::Unmap() { -    glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), -                    static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); -    buffer_pos += mapped_size; -} - -StorageBuffer::~StorageBuffer() { -    Release(); +GLuint OGLStreamBuffer::GetHandle() const { +    return gl_buffer.handle;  } -void StorageBuffer::Create(size_t size, size_t sync_subdivide) { -    if (gl_buffer.handle != 0) -        return; - -    buffer_pos = 0; -    buffer_size = size; -    buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); - -    gl_buffer.Create(); -    glBindBuffer(gl_target, gl_buffer.handle); - -    glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, -                    GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); -    mapped_ptr = reinterpret_cast<u8*>( -        glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), -                         GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); +GLsizeiptr OGLStreamBuffer::GetSize() const { +    return buffer_size;  } -void StorageBuffer::Release() { -    if (gl_buffer.handle == 0) -        return; - -    glUnmapBuffer(gl_target); - -    gl_buffer.Release(); -    head.clear(); -    tail.clear(); -} - -std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { +std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {      ASSERT(size <= buffer_size); +    ASSERT(alignment <= buffer_size); +    mapped_size = size; -    OGLSync sync; - -    buffer_pos = Common::AlignUp(buffer_pos, alignment); -    size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); - -    if (!head.empty() && -        (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { -        ASSERT(head.back().sync.handle == 0); -        head.back().sync.Create(); +    if (alignment > 0) { +        buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);      } +    bool invalidate = false;      if (buffer_pos + size > buffer_size) { -        if (!tail.empty()) { -            std::swap(sync, tail.back().sync); -            tail.clear(); -        } -        std::swap(tail, head);          buffer_pos = 0; -        effective_offset = 0; -    } +        invalidate = true; -    while (!tail.empty() && buffer_pos + size > tail.front().offset) { -        std::swap(sync, tail.front().sync); -        tail.pop_front(); +        if (persistent) { +            glUnmapBuffer(gl_target); +        }      } -    if (sync.handle != 0) { -        glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); -        sync.Release(); +    if (invalidate | !persistent) { +        GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | +                           (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | +                           (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); +        mapped_ptr = static_cast<u8*>( +            glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); +        mapped_offset = buffer_pos;      } -    if (head.empty() || effective_offset > head.back().offset) { -        head.emplace_back(); -        head.back().offset = effective_offset; +    return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); +} + +void OGLStreamBuffer::Unmap(GLsizeiptr size) { +    ASSERT(size <= mapped_size); + +    if (!coherent && size > 0) { +        glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);      } -    mapped_size = size; -    return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); -} +    if (!persistent) { +        glUnmapBuffer(gl_target); +    } -void StorageBuffer::Unmap() { -    glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), -                             static_cast<GLsizeiptr>(mapped_size)); -    buffer_pos += mapped_size; +    buffer_pos += size;  } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index e78dc5784..45592daaf 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,35 +2,41 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. -#pragma once - -#include <memory> +#include <tuple>  #include <glad/glad.h>  #include "common/common_types.h"  #include "video_core/renderer_opengl/gl_resource_manager.h"  class OGLStreamBuffer : private NonCopyable {  public: -    explicit OGLStreamBuffer(GLenum target); -    virtual ~OGLStreamBuffer() = default; - -public: -    static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); - -    virtual void Create(size_t size, size_t sync_subdivide) = 0; -    virtual void Release() {} +    explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false); +    ~OGLStreamBuffer();      GLuint GetHandle() const; +    GLsizeiptr GetSize() const; + +    /* +     * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes +     * and the optional alignment requirement. +     * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. +     * The return values are the pointer to the new chunk, the offset within the buffer, +     * and the invalidation flag for previous chunks. +     * The actual used size must be specified on unmapping the chunk. +     */ +    std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); -    virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; -    virtual void Unmap() = 0; +    void Unmap(GLsizeiptr size); -protected: +private:      OGLBuffer gl_buffer;      GLenum gl_target; -    size_t buffer_pos = 0; -    size_t buffer_size = 0; -    size_t buffer_sync_subdivide = 0; -    size_t mapped_size = 0; +    bool coherent = false; +    bool persistent = false; + +    GLintptr buffer_pos = 0; +    GLsizeiptr buffer_size = 0; +    GLintptr mapped_offset = 0; +    GLsizeiptr mapped_size = 0; +    u8* mapped_ptr = nullptr;  }; | 
