diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_processor.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 5 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 | 
9 files changed, 60 insertions, 6 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 28e8c13aa..8b9c548cc 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB  void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {      MICROPROFILE_SCOPE(ProcessCommandLists); +    // On entering GPU code, assume all memory may be touched by the ARM core. +    maxwell_3d->dirty_flags.OnMemoryWrite(); +      auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {          LOG_TRACE(HW_GPU,                    "Processing method {:08X} on subchannel {} value " diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 74e44c7fe..8d0700d13 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -2,8 +2,10 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include "core/core.h"  #include "core/memory.h"  #include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h"  #include "video_core/rasterizer_interface.h"  #include "video_core/textures/decoders.h" @@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {      u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);      if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { +        // All copies here update the main memory, so mark all rasterizer states as invalid. +        Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); +          rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);          // We have to invalidate the destination region to evict any outdated surfaces from the          // cache. We do this before actually writing the new data because the destination address diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 585290d9f..2adbc9eaf 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -3,8 +3,10 @@  // Refer to the license.txt file included.  #include "common/logging/log.h" +#include "core/core.h"  #include "core/memory.h"  #include "video_core/engines/kepler_memory.h" +#include "video_core/engines/maxwell_3d.h"  #include "video_core/rasterizer_interface.h"  namespace Tegra::Engines { @@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {      rasterizer.InvalidateRegion(dest_address, sizeof(u32));      Memory::Write32(dest_address, data); +    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();      state.write_offset++;  } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 6de07ea56..1772882b2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -123,10 +123,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {      if (regs.reg_array[method] != value) {          regs.reg_array[method] = value; +        // Vertex format          if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&              method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {              dirty_flags.vertex_attrib_format = true;          } + +        // Vertex buffer +        if (method >= MAXWELL3D_REG_INDEX(vertex_array) && +            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { +            dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); +        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && +                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { +            dirty_flags.vertex_array |= +                1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); +        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && +                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { +            dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays)); +        }      }      switch (method) { @@ -258,6 +272,7 @@ void Maxwell3D::ProcessQueryGet() {              query_result.timestamp = CoreTiming::GetTicks();              Memory::WriteBlock(*address, &query_result, sizeof(query_result));          } +        dirty_flags.OnMemoryWrite();          break;      }      default: @@ -334,6 +349,7 @@ void Maxwell3D::ProcessCBData(u32 value) {          memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);      Memory::Write32(*address, value); +    dirty_flags.OnMemoryWrite();      // Increment the current buffer position.      regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 91ca57883..0848b7121 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1014,6 +1014,11 @@ public:      struct DirtyFlags {          bool vertex_attrib_format = true; +        u32 vertex_array = 0xFFFFFFFF; + +        void OnMemoryWrite() { +            vertex_array = 0xFFFFFFFF; +        }      };      DirtyFlags dirty_flags; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index b8a78cf82..a34e884fe 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -2,7 +2,9 @@  // Licensed under GPLv2 or any later version  // Refer to the license.txt file included. +#include "core/core.h"  #include "core/memory.h" +#include "video_core/engines/maxwell_3d.h"  #include "video_core/engines/maxwell_dma.h"  #include "video_core/rasterizer_interface.h"  #include "video_core/textures/decoders.h" @@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {          return;      } +    // All copies here update the main memory, so mark all rasterizer states as invalid. +    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); +      if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {          // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D          // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 075192c3f..46a6c0308 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s      return std::make_tuple(uploaded_ptr, uploaded_offset);  } -void OGLBufferCache::Map(std::size_t max_size) { +bool OGLBufferCache::Map(std::size_t max_size) {      bool invalidate;      std::tie(buffer_ptr, buffer_offset_base, invalidate) =          stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); @@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {      if (invalidate) {          InvalidateAll();      } +    return invalidate;  }  void OGLBufferCache::Unmap() { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 91fca3f6c..c11acfb79 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -50,7 +50,7 @@ public:      /// Reserves memory to be used by host's CPU. Returns mapped address and offset.      std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); -    void Map(std::size_t max_size); +    bool Map(std::size_t max_size);      void Unmap();      GLuint GetHandle() const; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 54cc47a9b..cb0d0c16a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -183,15 +183,25 @@ void RasterizerOpenGL::SetupVertexFormat() {      }      state.draw.vertex_array = VAO.handle;      state.ApplyVertexBufferState(); + +    // Rebinding the VAO invalidates the vertex buffer bindings. +    gpu.dirty_flags.vertex_array = 0xFFFFFFFF;  }  void RasterizerOpenGL::SetupVertexBuffer() { -    MICROPROFILE_SCOPE(OpenGL_VB); -    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); +    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();      const auto& regs = gpu.regs; +    if (!gpu.dirty_flags.vertex_array) +        return; + +    MICROPROFILE_SCOPE(OpenGL_VB); +      // Upload all guest vertex arrays sequentially to our buffer      for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { +        if (~gpu.dirty_flags.vertex_array & (1u << index)) +            continue; +          const auto& vertex_array = regs.vertex_array[index];          if (!vertex_array.IsEnabled())              continue; @@ -218,6 +228,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {      // Implicit set by glBindVertexBuffer. Stupid glstate handling...      state.draw.vertex_buffer = buffer_cache.GetHandle(); + +    gpu.dirty_flags.vertex_array = 0;  }  DrawParameters RasterizerOpenGL::SetupDraw() { @@ -575,7 +587,7 @@ void RasterizerOpenGL::DrawArrays() {          return;      MICROPROFILE_SCOPE(OpenGL_Drawing); -    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); +    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();      const auto& regs = gpu.regs;      ScopeAcquireGLContext acquire_context{emu_window}; @@ -626,7 +638,11 @@ void RasterizerOpenGL::DrawArrays() {      // Add space for at least 18 constant buffers      buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); -    buffer_cache.Map(buffer_size); +    bool invalidate = buffer_cache.Map(buffer_size); +    if (invalidate) { +        // As all cached buffers are invalidated, we need to recheck their state. +        gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF; +    }      SetupVertexFormat();      SetupVertexBuffer();  | 
