diff options
| author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2020-02-17 18:10:23 -0400 | 
|---|---|---|
| committer | Fernando Sahmkow <fsahmkow27@gmail.com> | 2020-04-22 11:36:10 -0400 | 
| commit | 487379c593bcaf3787ede187c5d44f7923b54dc9 (patch) | |
| tree | b66c5c541a55be6d4b76b78c07be11731a7cb400 | |
| parent | ed7e9657120faea849af2933e539c72bc961c2a9 (diff) | |
OpenGL: Implement Fencing backend.
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 4 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 7 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 32 | 
12 files changed, 94 insertions, 19 deletions
| diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2605c3b42..c297bc31b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {      }  } -void Maxwell3D::ReleaseFences() { -    for (const auto pair : delay_fences) { -        const auto [addr, payload] = pair; -        memory_manager.Write<u32>(addr, static_cast<u32>(payload)); -    } -    delay_fences.clear(); -} -  void Maxwell3D::ProcessQueryGet() {      // TODO(Subv): Support the other query units.      ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, @@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() {      switch (regs.query.query_get.operation) {      case Regs::QueryOperation::Release: { -        rasterizer.FlushCommands(); -        rasterizer.SyncGuestHost();          const u64 result = regs.query.query_sequence; -        delay_fences.emplace_back(regs.query.QueryAddress(), result); +        if (regs.query.query_get.fence == 1) { +            rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result)); +        } else { +            StampQueryResult(result, regs.query.query_get.short_query == 0); +        }          break;      }      case Regs::QueryOperation::Acquire: diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0a93827ec..59d5752d2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1427,8 +1427,6 @@ public:          Tables tables{};      } dirty; -    void ReleaseFences(); -  private:      void InitializeRegisterDefaults(); @@ -1469,8 +1467,6 @@ private:      std::array<u8, Regs::NUM_REGS> dirty_pointers{}; -    std::vector<std::pair<GPUVAddr, u64>> delay_fences; -      /// Retrieves information about a specific TIC entry from the TIC buffer.      Texture::TICEntry GetTICEntry(u32 tic_index) const; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 71ddfbd26..d05b6a9d2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -147,7 +147,7 @@ void GPU::SyncGuestHost() {  }  void GPU::OnCommandListEnd() { -    maxwell_3d->ReleaseFences(); +    renderer.Rasterizer().ReleaseFences();  }  // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence  // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b88445634..fa9991c87 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -157,7 +157,7 @@ public:      void FlushCommands();      void SyncGuestHost(); -    void OnCommandListEnd(); +    virtual void OnCommandListEnd();      /// Returns a reference to the Maxwell3D GPU engine.      Engines::Maxwell3D& Maxwell3D(); diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 20e73a37e..53305ab43 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {      gpu_thread.WaitIdle();  } +void GPUAsynch::OnCommandListEnd() { +    gpu_thread.OnCommandListEnd(); +} +  } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 03fd0eef0..517658612 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -32,6 +32,8 @@ public:      void FlushAndInvalidateRegion(VAddr addr, u64 size) override;      void WaitIdle() const override; +    void OnCommandListEnd() override; +  protected:      void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1994d3bb4..251a9d911 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic              dma_pusher.DispatchCalls();          } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {              renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); +        } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { +            renderer.Rasterizer().ReleaseFences();          } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {              renderer.Rasterizer().FlushRegion(data->addr, data->size);          } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { @@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const {      }  } +void ThreadManager::OnCommandListEnd() { +    PushCommand(OnCommandListEndCommand()); +} +  u64 ThreadManager::PushCommand(CommandData&& command_data) {      const u64 fence{++state.last_fence};      state.queue.Push(CommandDataContainer(std::move(command_data), fence)); diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cd74ad330..9d0877921 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final {      u64 size;  }; +/// Command to signal to the GPU thread that processing has ended +struct OnCommandListEndCommand final {}; +  using CommandData =      std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, -                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; +                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;  struct CommandDataContainer {      CommandDataContainer() = default; @@ -122,6 +125,8 @@ public:      // Wait until the gpu thread is idle.      void WaitIdle() const; +    void OnCommandListEnd(); +  private:      /// Pushes a command to be executed by the GPU thread      u64 PushCommand(CommandData&& command_data); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 0d05a3fc7..72f65b166 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -49,6 +49,14 @@ public:      /// Records a GPU query and caches it      virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; +    virtual void SignalFence(GPUVAddr addr, u32 value) { + +    } + +    virtual void ReleaseFences() { + +    } +      /// Notify rasterizer that all caches should be flushed to Switch memory      virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 988eaeaa5..93bb33e8c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() {      buffer_cache.SyncGuestHost();  } +void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) { +    if (!fences.empty()) { +        const std::pair<GPUVAddr, u32>& current_fence = fences.front(); +        const auto [address, payload] = current_fence; +        texture_cache.PopAsyncFlushes(); +        auto& gpu{system.GPU()}; +        auto& memory_manager{gpu.MemoryManager()}; +        memory_manager.Write<u32>(address, payload); +        fences.pop_front(); +    } +    fences.emplace_back(addr, value); +    texture_cache.CommitAsyncFlushes(); +    FlushCommands(); +    SyncGuestHost(); +} + +void RasterizerOpenGL::ReleaseFences() { +    while (!fences.empty()) { +        const std::pair<GPUVAddr, u32>& current_fence = fences.front(); +        const auto [address, payload] = current_fence; +        texture_cache.PopAsyncFlushes(); +        auto& gpu{system.GPU()}; +        auto& memory_manager{gpu.MemoryManager()}; +        memory_manager.Write<u32>(address, payload); +        fences.pop_front(); +    } +} +  void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {      if (Settings::IsGPULevelExtreme()) {          FlushRegion(addr, size); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a870024c6..486a154ad 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -69,6 +69,8 @@ public:      void InvalidateRegion(VAddr addr, u64 size) override;      void OnCPUWrite(VAddr addr, u64 size) override;      void SyncGuestHost() override; +    void SignalFence(GPUVAddr addr, u32 value) override; +    void ReleaseFences() override;      void FlushAndInvalidateRegion(VAddr addr, u64 size) override;      void FlushCommands() override;      void TickFrame() override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d8c8390bb..6629c59ed 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -238,7 +238,7 @@ public:              surface->MarkAsRenderTarget(false, NO_RT);              const auto& cr_params = surface->GetSurfaceParams();              if (!cr_params.is_tiled) { -                FlushSurface(surface); +                AsyncFlushSurface(surface);              }          }          render_targets[index].target = surface_view.first; @@ -317,6 +317,26 @@ public:          return ++ticks;      } +    void CommitAsyncFlushes() { +        commited_flushes.push_back(uncommited_flushes); +        uncommited_flushes.reset(); +    } + +    void PopAsyncFlushes() { +        if (commited_flushes.empty()) { +            return; +        } +        auto& flush_list = commited_flushes.front(); +        if (!flush_list) { +            commited_flushes.pop_front(); +            return; +        } +        for (TSurface& surface : *flush_list) { +            FlushSurface(surface); +        } +        commited_flushes.pop_front(); +    } +  protected:      explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,                            bool is_astc_supported) @@ -1152,6 +1172,13 @@ private:          TView view;      }; +    void AsyncFlushSurface(TSurface& surface) { +        if (!uncommited_flushes) { +            uncommited_flushes = std::make_shared<std::list<TSurface>>(); +        } +        uncommited_flushes->push_back(surface); +    } +      VideoCore::RasterizerInterface& rasterizer;      FormatLookupTable format_lookup_table; @@ -1198,6 +1225,9 @@ private:      std::list<TSurface> marked_for_unregister; +    std::shared_ptr<std::list<TSurface>> uncommited_flushes{}; +    std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes; +      StagingCache staging_cache;      std::recursive_mutex mutex;  }; | 
