diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 103 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 71 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.cpp | 2 | 
7 files changed, 173 insertions, 110 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index eb5158407..4489abf61 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp @@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) {      return type;  } -std::string GlobalMemoryName(const GlobalMemoryBase& base) { -    return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); -} -  class ARBDecompiler final {  public:      explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, @@ -199,6 +195,8 @@ public:      }  private: +    void DefineGlobalMemory(); +      void DeclareHeader();      void DeclareVertex();      void DeclareGeometry(); @@ -228,6 +226,7 @@ private:      std::pair<std::string, std::size_t> BuildCoords(Operation);      std::string BuildAoffi(Operation); +    std::string GlobalMemoryPointer(const GmemNode& gmem);      void Exit();      std::string Assign(Operation); @@ -378,10 +377,8 @@ private:          std::string address;          std::string_view opname;          if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { -            AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), -                    Visit(gmem->GetBaseAddress())); -            address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); -            opname = "ATOMB"; +            address = GlobalMemoryPointer(*gmem); +            opname = "ATOM";          } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {              address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));              opname = "ATOMS"; @@ -456,9 +453,13 @@ private:          shader_source += '\n';      } -    std::string AllocTemporary() { -        max_temporaries = std::max(max_temporaries, num_temporaries + 1); -        return fmt::format("T{}.x", num_temporaries++); +    std::string AllocLongVectorTemporary() { +        max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); +        return fmt::format("L{}", num_long_temporaries++); +    } + +    std::string AllocLongTemporary() { +        return fmt::format("{}.x", AllocLongVectorTemporary());      }      std::string AllocVectorTemporary() { @@ -466,8 +467,13 @@ private:          return fmt::format("T{}", num_temporaries++);      } +    std::string AllocTemporary() { +        return fmt::format("{}.x", AllocVectorTemporary()); +    } +      void ResetTemporaries() noexcept {          num_temporaries = 0; +        num_long_temporaries = 0;      }      const Device& device; @@ -478,6 +484,11 @@ private:      std::size_t num_temporaries = 0;      std::size_t max_temporaries = 0; +    std::size_t num_long_temporaries = 0; +    std::size_t max_long_temporaries = 0; + +    std::map<GlobalMemoryBase, u32> global_memory_names; +      std::string shader_source;      static constexpr std::string_view ADD_F32 = "ADD.F32"; @@ -784,6 +795,8 @@ private:  ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,                               ShaderType stage, std::string_view identifier)      : device{device}, ir{ir}, registry{registry}, stage{stage} { +    DefineGlobalMemory(); +      AddLine("TEMP RC;");      AddLine("TEMP FSWZA[4];");      AddLine("TEMP FSWZB[4];"); @@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) {      }  } +void ARBDecompiler::DefineGlobalMemory() { +    u32 binding = 0; +    for (const auto& pair : ir.GetGlobalMemory()) { +        const GlobalMemoryBase base = pair.first; +        global_memory_names.emplace(base, binding); +        ++binding; +    } +} +  void ARBDecompiler::DeclareHeader() {      AddLine("!!NV{}5.0", HeaderStageName(stage));      // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D      AddLine("OPTION NV_internal;");      AddLine("OPTION NV_gpu_program_fp64;"); -    AddLine("OPTION NV_shader_storage_buffer;");      AddLine("OPTION NV_shader_thread_group;");      if (ir.UsesWarps() && device.HasWarpIntrinsics()) {          AddLine("OPTION NV_shader_thread_shuffle;"); @@ -951,11 +972,10 @@ void ARBDecompiler::DeclareLocalMemory() {  }  void ARBDecompiler::DeclareGlobalMemory() { -    u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; -    for (const auto& pair : ir.GetGlobalMemory()) { -        const auto& base = pair.first; -        AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); -        ++binding; +    const std::size_t num_entries = ir.GetGlobalMemory().size(); +    if (num_entries > 0) { +        const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; +        AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);      }  } @@ -977,6 +997,9 @@ void ARBDecompiler::DeclareTemporaries() {      for (std::size_t i = 0; i < max_temporaries; ++i) {          AddLine("TEMP T{};", i);      } +    for (std::size_t i = 0; i < max_long_temporaries; ++i) { +        AddLine("LONG TEMP L{};", i); +    }  }  void ARBDecompiler::DeclarePredicates() { @@ -1339,10 +1362,7 @@ std::string ARBDecompiler::Visit(const Node& node) {      if (const auto gmem = std::get_if<GmemNode>(&*node)) {          std::string temporary = AllocTemporary(); -        AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), -                Visit(gmem->GetBaseAddress())); -        AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), -                temporary); +        AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));          return temporary;      } @@ -1419,6 +1439,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {      return fmt::format(", offset({})", temporary);  } +std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { +    const u32 binding = global_memory_names.at(gmem.GetDescriptor()); +    const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; + +    const std::string pointer = AllocLongVectorTemporary(); +    std::string temporary = AllocTemporary(); + +    const u32 local_index = binding / 2; +    AddLine("PK64.U {}, c[{}];", pointer, local_index); +    AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), +            Visit(gmem.GetBaseAddress())); +    AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); +    AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); +    return fmt::format("{}.x", pointer); +} +  void ARBDecompiler::Exit() {      if (stage != ShaderType::Fragment) {          AddLine("RET;"); @@ -1515,11 +1551,7 @@ std::string ARBDecompiler::Assign(Operation operation) {          ResetTemporaries();          return {};      } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { -        const std::string temporary = AllocTemporary(); -        AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), -                Visit(gmem->GetBaseAddress())); -        AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), -                temporary); +        AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));          ResetTemporaries();          return {};      } else { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e461e4c70..e866d8f2f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)      : VideoCommon::BufferBlock{cpu_addr, size} {      gl_buffer.Create();      glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); -    if (device.HasVertexBufferUnifiedMemory()) { +    if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {          glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);          glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);      } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c3fad563c..03e82c599 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -139,6 +139,18 @@ void oglEnable(GLenum cap, bool state) {      (state ? glEnable : glDisable)(cap);  } +void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { +    if (num_entries == 0) { +        return; +    } +    if (num_entries % 2 == 1) { +        pointers[num_entries] = 0; +    } +    const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); +    glProgramLocalParametersI4uivNV(target, 0, num_vectors, +                                    reinterpret_cast<const GLuint*>(pointers)); +} +  } // Anonymous namespace  RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, @@ -324,7 +336,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {  void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {      MICROPROFILE_SCOPE(OpenGL_Shader);      auto& gpu = system.GPU().Maxwell3D(); -    std::size_t num_ssbos = 0;      u32 clip_distances = 0;      for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -347,29 +358,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {          }          // Currently this stages are not supported in the OpenGL backend. -        // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL -        if (program == Maxwell::ShaderProgram::TesselationControl) { +        // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL +        if (program == Maxwell::ShaderProgram::TesselationControl || +            program == Maxwell::ShaderProgram::TesselationEval) {              continue; -        } else if (program == Maxwell::ShaderProgram::TesselationEval) { -            continue; -        } - -        Shader* shader = shader_cache.GetStageProgram(program, async_shaders); - -        if (device.UseAssemblyShaders()) { -            // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this -            // all stages share the same bindings. -            const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); -            ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); -            num_ssbos += num_stage_ssbos;          } -        // Stage indices are 0 - 5 -        const std::size_t stage = index == 0 ? 0 : index - 1; -        SetupDrawConstBuffers(stage, shader); -        SetupDrawGlobalMemory(stage, shader); -        SetupDrawTextures(stage, shader); -        SetupDrawImages(stage, shader); +        Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);          const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;          switch (program) { @@ -388,6 +383,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {                                shader_config.enable.Value(), shader_config.offset);          } +        // Stage indices are 0 - 5 +        const std::size_t stage = index == 0 ? 0 : index - 1; +        SetupDrawConstBuffers(stage, shader); +        SetupDrawGlobalMemory(stage, shader); +        SetupDrawTextures(stage, shader); +        SetupDrawImages(stage, shader); +          // Workaround for Intel drivers.          // When a clip distance is enabled but not set in the shader it crops parts of the screen          // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the @@ -749,6 +751,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {      current_cbuf = 0;      auto kernel = shader_cache.GetComputeKernel(code_addr); +    program_manager.BindCompute(kernel->GetHandle()); +      SetupComputeTextures(kernel);      SetupComputeImages(kernel); @@ -763,7 +767,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {      buffer_cache.Unmap();      const auto& launch_desc = system.GPU().KeplerCompute().launch_description; -    program_manager.BindCompute(kernel->GetHandle());      glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);      ++num_queued_commands;  } @@ -1023,40 +1026,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,  }  void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { +    static constexpr std::array TARGET_LUT = { +        GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, +        GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, +    }; +      auto& gpu{system.GPU()};      auto& memory_manager{gpu.MemoryManager()}; -    const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; +    const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; +    const auto& entries{shader->GetEntries().global_memory_entries}; + +    std::array<GLuint64EXT, 32> pointers; +    ASSERT(entries.size() < pointers.size()); -    u32 binding = -        device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; -    for (const auto& entry : shader->GetEntries().global_memory_entries) { +    const bool assembly_shaders = device.UseAssemblyShaders(); +    u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; +    for (const auto& entry : entries) {          const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};          const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};          const u32 size{memory_manager.Read<u32>(addr + 8)}; -        SetupGlobalMemory(binding++, entry, gpu_addr, size); +        SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); +        ++binding; +    } +    if (assembly_shaders) { +        UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());      }  }  void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {      auto& gpu{system.GPU()};      auto& memory_manager{gpu.MemoryManager()}; -    const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; +    const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; +    const auto& entries{kernel->GetEntries().global_memory_entries}; + +    std::array<GLuint64EXT, 32> pointers; +    ASSERT(entries.size() < pointers.size());      u32 binding = 0; -    for (const auto& entry : kernel->GetEntries().global_memory_entries) { -        const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; -        const auto gpu_addr{memory_manager.Read<u64>(addr)}; -        const auto size{memory_manager.Read<u32>(addr + 8)}; -        SetupGlobalMemory(binding++, entry, gpu_addr, size); +    for (const auto& entry : entries) { +        const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; +        const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; +        const u32 size{memory_manager.Read<u32>(addr + 8)}; +        SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); +        ++binding; +    } +    if (device.UseAssemblyShaders()) { +        UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());      }  }  void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, -                                         GPUVAddr gpu_addr, std::size_t size) { -    const auto alignment{device.GetShaderStorageBufferAlignment()}; +                                         GPUVAddr gpu_addr, std::size_t size, +                                         GLuint64EXT* pointer) { +    const std::size_t alignment{device.GetShaderStorageBufferAlignment()};      const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); -    glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, -                      static_cast<GLsizeiptr>(size)); +    if (device.UseAssemblyShaders()) { +        *pointer = info.address + info.offset; +    } else { +        glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, +                          static_cast<GLsizeiptr>(size)); +    }  }  void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a95646936..ccc6f50f6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -124,9 +124,9 @@ private:      /// Configures the current global memory entries to use for the kernel invocation.      void SetupComputeGlobalMemory(Shader* kernel); -    /// Configures a constant buffer. +    /// Configures a global memory buffer.      void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, -                           std::size_t size); +                           std::size_t size, GLuint64EXT* pointer);      /// Configures the current textures to use for the draw command.      void SetupDrawTextures(std::size_t stage_index, Shader* shader); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8e754fa90..691c6c79b 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -11,8 +11,30 @@  namespace OpenGL { -ProgramManager::ProgramManager(const Device& device) { -    use_assembly_programs = device.UseAssemblyShaders(); +namespace { + +void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { +    if (current == old) { +        return; +    } +    if (current == 0) { +        if (enabled) { +            enabled = false; +            glDisable(stage); +        } +        return; +    } +    if (!enabled) { +        enabled = true; +        glEnable(stage); +    } +    glBindProgramARB(stage, current); +} + +} // Anonymous namespace + +ProgramManager::ProgramManager(const Device& device) +    : use_assembly_programs{device.UseAssemblyShaders()} {      if (use_assembly_programs) {          glEnable(GL_COMPUTE_PROGRAM_NV);      } else { @@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) {  }  void ProgramManager::BindGraphicsPipeline() { -    if (use_assembly_programs) { -        UpdateAssemblyPrograms(); -    } else { +    if (!use_assembly_programs) {          UpdateSourcePrograms();      }  } @@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() {      }  } -void ProgramManager::UpdateAssemblyPrograms() { -    const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { -        if (current == old) { -            return; -        } -        if (current == 0) { -            if (enabled) { -                enabled = false; -                glDisable(stage); -            } -            return; -        } -        if (!enabled) { -            enabled = true; -            glEnable(stage); -        } -        glBindProgramARB(stage, current); -    }; +void ProgramManager::UseVertexShader(GLuint program) { +    if (use_assembly_programs) { +        BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); +    } +    current_state.vertex = program; +} -    update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); -    update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, -                 old_state.geometry); -    update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, -                 old_state.fragment); +void ProgramManager::UseGeometryShader(GLuint program) { +    if (use_assembly_programs) { +        BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); +    } +    current_state.geometry = program; +} -    old_state = current_state; +void ProgramManager::UseFragmentShader(GLuint program) { +    if (use_assembly_programs) { +        BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); +    } +    current_state.fragment = program;  }  void ProgramManager::UpdateSourcePrograms() { diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 0f03b4f12..950e0dfcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -45,17 +45,9 @@ public:      /// Rewinds BindHostPipeline state changes.      void RestoreGuestPipeline(); -    void UseVertexShader(GLuint program) { -        current_state.vertex = program; -    } - -    void UseGeometryShader(GLuint program) { -        current_state.geometry = program; -    } - -    void UseFragmentShader(GLuint program) { -        current_state.fragment = program; -    } +    void UseVertexShader(GLuint program); +    void UseGeometryShader(GLuint program); +    void UseFragmentShader(GLuint program);  private:      struct PipelineState { @@ -64,9 +56,6 @@ private:          GLuint fragment = 0;      }; -    /// Update NV_gpu_program5 programs. -    void UpdateAssemblyPrograms(); -      /// Update GLSL programs.      void UpdateSourcePrograms(); diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 3655ff629..887995cf4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver      mapped_ptr = static_cast<u8*>(          glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); -    if (device.HasVertexBufferUnifiedMemory()) { +    if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {          glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);          glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);      }  | 
