diff options
| author | bunnei <bunneidev@gmail.com> | 2019-07-21 00:59:52 -0400 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-07-21 00:59:52 -0400 | 
| commit | 27e10e0442dfd347387c6eaf148b27f5cc38bcaf (patch) | |
| tree | c078fc3f0e62e55fc92a0c8b582666deece0a968 | |
| parent | 6738fb5fef789d8e6674459b1bf656d0f983b159 (diff) | |
| parent | 7a35178ee2c8ce60c87654ed2d80cc76abb0380b (diff) | |
Merge pull request #2735 from FernandoS27/pipeline-rework
Rework Dirty Flags in GPU Pipeline, Optimize CBData and Redo Clearing mechanism
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 261 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 89 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 178 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 41 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 13 | 
14 files changed, 528 insertions, 116 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3175579cc..bd036cbe8 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {      MICROPROFILE_SCOPE(DispatchCalls);      // On entering GPU code, assume all memory may be touched by the ARM core. -    gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); +    gpu.Maxwell3D().dirty.OnMemoryWrite();      dma_pushbuffer_subindex = 0; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..e3d5fb8a9 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {          const bool is_last_call = method_call.IsLastCall();          upload_state.ProcessData(method_call.argument, is_last_call);          if (is_last_call) { -            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); +            system.GPU().Maxwell3D().dirty.OnMemoryWrite();          }          break;      } diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 0561f676c..44279de00 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {          const bool is_last_call = method_call.IsLastCall();          upload_state.ProcessData(method_call.argument, is_last_call);          if (is_last_call) { -            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); +            system.GPU().Maxwell3D().dirty.OnMemoryWrite();          }          break;      } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8755b8af4..fe9fc0278 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste                       MemoryManager& memory_manager)      : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},        macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { +    InitDirtySettings();      InitializeRegisterDefaults();  } @@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {      regs.stencil_back_func_mask = 0xFFFFFFFF;      regs.stencil_back_mask = 0xFFFFFFFF; +    regs.depth_test_func = Regs::ComparisonOp::Always; +    regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; +    regs.cull.cull_face = Regs::Cull::CullFace::Back; +      // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a      // register carrying a default value. Assume it's OpenGL's default (1).      regs.point_size = 1.0f; @@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() {      regs.rt_separate_frag_data = 1;  } +#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) + +void Maxwell3D::InitDirtySettings() { +    const auto set_block = [this](const u32 start, const u32 range, const u8 position) { +        const auto start_itr = dirty_pointers.begin() + start; +        const auto end_itr = start_itr + range; +        std::fill(start_itr, end_itr, position); +    }; +    dirty.regs.fill(true); + +    // Init Render Targets +    constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); +    constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); +    constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; +    u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); +    for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { +        set_block(rt_reg, registers_per_rt, rt_dirty_reg); +        rt_dirty_reg++; +    } +    constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); +    dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; +    dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; +    dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; +    constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); +    constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); +    set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); + +    // Init Vertex Arrays +    constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); +    constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); +    constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; +    u32 va_reg = DIRTY_REGS_POS(vertex_array); +    u32 vi_reg = DIRTY_REGS_POS(vertex_instance); +    for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; +         vertex_reg += vertex_array_size) { +        set_block(vertex_reg, 3, va_reg); +        // The divisor concerns vertex array instances +        dirty_pointers[vertex_reg + 3] = vi_reg; +        va_reg++; +        vi_reg++; +    } +    constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); +    constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); +    constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; +    va_reg = DIRTY_REGS_POS(vertex_array); +    for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; +         vertex_reg += vertex_limit_size) { +        set_block(vertex_reg, vertex_limit_size, va_reg); +        va_reg++; +    } +    constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); +    constexpr u32 vertex_instance_size = +        sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); +    constexpr u32 vertex_instance_end = +        vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; +    vi_reg = DIRTY_REGS_POS(vertex_instance); +    for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; +         vertex_reg += vertex_instance_size) { +        set_block(vertex_reg, vertex_instance_size, vi_reg); +        vi_reg++; +    } +    set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), +              DIRTY_REGS_POS(vertex_attrib_format)); + +    // Init Shaders +    constexpr u32 shader_registers_count = +        sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); +    set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, +              DIRTY_REGS_POS(shaders)); + +    // State + +    // Viewport +    constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); +    constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); +    constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); +    set_block(viewport_start, viewport_size, viewport_dirty_reg); +    constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); +    constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); +    set_block(view_volume_start, view_volume_size, viewport_dirty_reg); + +    // Viewport transformation +    constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); +    constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); +    set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); + +    // Cullmode +    constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); +    constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); +    set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); + +    // Screen y control +    dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); + +    // Primitive Restart +    constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); +    constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); +    set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); + +    // Depth Test +    constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); +    dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; + +    // Stencil Test +    constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; + +    // Color Mask +    constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); +    dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; +    set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), +              color_mask_dirty_reg); +    // Blend State +    constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); +    set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), +              blend_state_dirty_reg); +    dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; +    set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); +    set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), +              blend_state_dirty_reg); + +    // Scissor State +    constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); +    set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), +              scissor_test_dirty_reg); + +    // Polygon Offset +    constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); +    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; +    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; +} +  void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {      // Reset the current macro.      executing_macro = 0; @@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {      const u32 method = method_call.method; +    if (method == cb_data_state.current) { +        regs.reg_array[method] = method_call.argument; +        ProcessCBData(method_call.argument); +        return; +    } else if (cb_data_state.current != null_cb_data) { +        FinishCBData(); +    } +      // It is an error to write to a register other than the current macro's ARG register before it      // has finished execution.      if (executing_macro != 0) { @@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {      if (regs.reg_array[method] != method_call.argument) {          regs.reg_array[method] = method_call.argument; -        // Color buffers -        constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); -        constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); -        if (method >= first_rt_reg && -            method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { -            const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; -            dirty_flags.color_buffer.set(rt_index); -        } - -        // Zeta buffer -        constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); -        if (method == MAXWELL3D_REG_INDEX(zeta_enable) || -            method == MAXWELL3D_REG_INDEX(zeta_width) || -            method == MAXWELL3D_REG_INDEX(zeta_height) || -            (method >= MAXWELL3D_REG_INDEX(zeta) && -             method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { -            dirty_flags.zeta_buffer = true; -        } - -        // Shader -        constexpr u32 shader_registers_count = -            sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); -        if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && -            method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { -            dirty_flags.shaders = true; -        } - -        // Vertex format -        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && -            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { -            dirty_flags.vertex_attrib_format = true; -        } - -        // Vertex buffer -        if (method >= MAXWELL3D_REG_INDEX(vertex_array) && -            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) { -            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); -        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && -                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) { -            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); -        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && -                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) { -            dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); +        const std::size_t dirty_reg = dirty_pointers[method]; +        if (dirty_reg) { +            dirty.regs[dirty_reg] = true; +            if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && +                dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { +                dirty.vertex_array_buffers = true; +            } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && +                       dirty_reg < DIRTY_REGS_POS(vertex_instances)) { +                dirty.vertex_instances = true; +            } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && +                       dirty_reg < DIRTY_REGS_POS(render_settings)) { +                dirty.render_settings = true; +            }          }      } @@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {      case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):      case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):      case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { -        ProcessCBData(method_call.argument); +        StartCBData(method);          break;      }      case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { @@ -261,7 +397,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {          const bool is_last_call = method_call.IsLastCall();          upload_state.ProcessData(method_call.argument, is_last_call);          if (is_last_call) { -            dirty_flags.OnMemoryWrite(); +            dirty.OnMemoryWrite();          }          break;      } @@ -333,7 +469,6 @@ void Maxwell3D::ProcessQueryGet() {              query_result.timestamp = system.CoreTiming().GetTicks();              memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));          } -        dirty_flags.OnMemoryWrite();          break;      }      default: @@ -405,23 +540,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {  }  void Maxwell3D::ProcessCBData(u32 value) { +    const u32 id = cb_data_state.id; +    cb_data_state.buffer[id][cb_data_state.counter] = value; +    // Increment the current buffer position. +    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; +    cb_data_state.counter++; +} + +void Maxwell3D::StartCBData(u32 method) { +    constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]); +    cb_data_state.start_pos = regs.const_buffer.cb_pos; +    cb_data_state.id = method - first_cb_data; +    cb_data_state.current = method; +    cb_data_state.counter = 0; +    ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); +} + +void Maxwell3D::FinishCBData() {      // Write the input value to the current const buffer at the current position.      const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();      ASSERT(buffer_address != 0);      // Don't allow writing past the end of the buffer. -    ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); +    ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); -    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; +    const GPUVAddr address{buffer_address + cb_data_state.start_pos}; +    const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; -    u8* ptr{memory_manager.GetPointer(address)}; -    rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); -    memory_manager.Write<u32>(address, value); +    const u32 id = cb_data_state.id; +    memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); +    dirty.OnMemoryWrite(); -    dirty_flags.OnMemoryWrite(); - -    // Increment the current buffer position. -    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; +    cb_data_state.id = null_cb_data; +    cb_data_state.current = null_cb_data;  }  Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8d15c8a48..ac300bf76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1124,23 +1124,77 @@ public:      State state{}; -    struct DirtyFlags { -        std::bitset<8> color_buffer{0xFF}; -        std::bitset<32> vertex_array{0xFFFFFFFF}; +    struct DirtyRegs { +        static constexpr std::size_t NUM_REGS = 256; +        union { +            struct { +                bool null_dirty; + +                // Vertex Attributes +                bool vertex_attrib_format; + +                // Vertex Arrays +                std::array<bool, 32> vertex_array; + +                bool vertex_array_buffers; + +                // Vertex Instances +                std::array<bool, 32> vertex_instance; + +                bool vertex_instances; + +                // Render Targets +                std::array<bool, 8> render_target; +                bool depth_buffer; + +                bool render_settings; + +                // Shaders +                bool shaders; + +                // Rasterizer State +                bool viewport; +                bool clip_coefficient; +                bool cull_mode; +                bool primitive_restart; +                bool depth_test; +                bool stencil_test; +                bool blend_state; +                bool scissor_test; +                bool transform_feedback; +                bool color_mask; +                bool polygon_offset; -        bool vertex_attrib_format = true; -        bool zeta_buffer = true; -        bool shaders = true; +                // Complementary +                bool viewport_transform; +                bool screen_y_control; + +                bool memory_general; +            }; +            std::array<bool, NUM_REGS> regs; +        }; + +        void ResetVertexArrays() { +            vertex_array.fill(true); +            vertex_array_buffers = true; +        } + +        void ResetRenderTargets() { +            depth_buffer = true; +            render_target.fill(true); +            render_settings = true; +        }          void OnMemoryWrite() { -            zeta_buffer = true;              shaders = true; -            color_buffer.set(); -            vertex_array.set(); +            memory_general = true; +            ResetRenderTargets(); +            ResetVertexArrays();          } -    }; -    DirtyFlags dirty_flags; +    } dirty{}; + +    std::array<u8, Regs::NUM_REGS> dirty_pointers{};      /// Reads a register value located at the input method address      u32 GetRegisterValue(u32 method) const; @@ -1192,6 +1246,15 @@ private:      /// Interpreter for the macro codes uploaded to the GPU.      MacroInterpreter macro_interpreter; +    static constexpr u32 null_cb_data = 0xFFFFFFFF; +    struct { +        std::array<std::array<u32, 0x4000>, 16> buffer; +        u32 current{null_cb_data}; +        u32 id{null_cb_data}; +        u32 start_pos{}; +        u32 counter{}; +    } cb_data_state; +      Upload::State upload_state;      /// Retrieves information about a specific TIC entry from the TIC buffer. @@ -1200,6 +1263,8 @@ private:      /// Retrieves information about a specific TSC entry from the TSC buffer.      Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; +    void InitDirtySettings(); +      /**       * Call a macro on this engine.       * @param method Method to call @@ -1223,7 +1288,9 @@ private:      void ProcessSyncPoint();      /// Handles a write to the CB_DATA[i] register. +    void StartCBData(u32 method);      void ProcessCBData(u32 value); +    void FinishCBData();      /// Handles a write to the CB_BIND register.      void ProcessCBBind(Regs::ShaderStage stage); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..b5f57e534 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {      }      // All copies here update the main memory, so mark all rasterizer states as invalid. -    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); +    system.GPU().Maxwell3D().dirty.OnMemoryWrite();      if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {          // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0bb5c068c..0432a9e10 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -105,6 +105,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind      shader_program_manager = std::make_unique<GLShader::ProgramManager>();      state.draw.shader_program = 0;      state.Apply(); +    clear_framebuffer.Create();      LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");      CheckExtensions(); @@ -124,10 +125,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {      auto& gpu = system.GPU().Maxwell3D();      const auto& regs = gpu.regs; -    if (!gpu.dirty_flags.vertex_attrib_format) { +    if (!gpu.dirty.vertex_attrib_format) {          return state.draw.vertex_array;      } -    gpu.dirty_flags.vertex_attrib_format = false; +    gpu.dirty.vertex_attrib_format = false;      MICROPROFILE_SCOPE(OpenGL_VAO); @@ -181,7 +182,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {      }      // Rebinding the VAO invalidates the vertex buffer bindings. -    gpu.dirty_flags.vertex_array.set(); +    gpu.dirty.ResetVertexArrays();      state.draw.vertex_array = vao_entry.handle;      return vao_entry.handle; @@ -189,17 +190,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {  void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {      auto& gpu = system.GPU().Maxwell3D(); -    const auto& regs = gpu.regs; - -    if (gpu.dirty_flags.vertex_array.none()) +    if (!gpu.dirty.vertex_array_buffers)          return; +    gpu.dirty.vertex_array_buffers = false; + +    const auto& regs = gpu.regs;      MICROPROFILE_SCOPE(OpenGL_VB);      // Upload all guest vertex arrays sequentially to our buffer      for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { -        if (!gpu.dirty_flags.vertex_array[index]) +        if (!gpu.dirty.vertex_array[index])              continue; +        gpu.dirty.vertex_array[index] = false; +        gpu.dirty.vertex_instance[index] = false;          const auto& vertex_array = regs.vertex_array[index];          if (!vertex_array.IsEnabled()) @@ -224,8 +228,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {              glVertexArrayBindingDivisor(vao, index, 0);          }      } +} + +void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { +    auto& gpu = system.GPU().Maxwell3D(); + +    if (!gpu.dirty.vertex_instances) +        return; +    gpu.dirty.vertex_instances = false; -    gpu.dirty_flags.vertex_array.reset(); +    const auto& regs = gpu.regs; +    // Upload all guest vertex arrays sequentially to our buffer +    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { +        if (!gpu.dirty.vertex_instance[index]) +            continue; + +        gpu.dirty.vertex_instance[index] = false; + +        if (regs.instanced_arrays.IsInstancingEnabled(index) && +            regs.vertex_array[index].divisor != 0) { +            // Enable vertex buffer instancing with the specified divisor. +            glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); +        } else { +            // Disable the vertex buffer instancing. +            glVertexArrayBindingDivisor(vao, index, 0); +        } +    }  }  GLintptr RasterizerOpenGL::SetupIndexBuffer() { @@ -341,7 +369,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {      SyncClipEnabled(clip_distances); -    gpu.dirty_flags.shaders = false; +    gpu.dirty.shaders = false;  }  std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -424,13 +452,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(      const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,                                                   single_color_target}; -    if (fb_config_state == current_framebuffer_config_state && -        gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { +    if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {          // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or          // single color targets). This is done because the guest registers may not change but the          // host framebuffer may contain different attachments          return current_depth_stencil_usage;      } +    gpu.dirty.render_settings = false;      current_framebuffer_config_state = fb_config_state;      texture_cache.GuardRenderTargets(true); @@ -519,13 +547,65 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(      return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};  } +void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, +                                                 bool using_depth_fb, bool using_stencil_fb) { +    auto& gpu = system.GPU().Maxwell3D(); +    const auto& regs = gpu.regs; + +    texture_cache.GuardRenderTargets(true); +    View color_surface{}; +    if (using_color_fb) { +        color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); +    } +    View depth_surface{}; +    if (using_depth_fb || using_stencil_fb) { +        depth_surface = texture_cache.GetDepthBufferSurface(false); +    } +    texture_cache.GuardRenderTargets(false); + +    current_state.draw.draw_framebuffer = clear_framebuffer.handle; +    current_state.ApplyFramebufferState(); + +    if (color_surface) { +        color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); +    } else { +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); +    } + +    if (depth_surface) { +        const auto& params = depth_surface->GetSurfaceParams(); +        switch (params.type) { +        case VideoCore::Surface::SurfaceType::Depth: { +            depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); +            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +            break; +        } +        case VideoCore::Surface::SurfaceType::DepthStencil: { +            depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); +            break; +        } +        default: { UNIMPLEMENTED(); } +        } +    } else { +        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, +                               0); +    } +} +  void RasterizerOpenGL::Clear() {      const auto& regs = system.GPU().Maxwell3D().regs;      bool use_color{};      bool use_depth{};      bool use_stencil{}; -    OpenGLState clear_state; +    OpenGLState prev_state{OpenGLState::GetCurState()}; +    SCOPE_EXIT({ +        prev_state.AllDirty(); +        prev_state.Apply(); +    }); + +    OpenGLState clear_state{OpenGLState::GetCurState()}; +    clear_state.SetDefaultViewports();      if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||          regs.clear_buffers.A) {          use_color = true; @@ -545,6 +625,7 @@ void RasterizerOpenGL::Clear() {          // true.          clear_state.depth.test_enabled = true;          clear_state.depth.test_func = GL_ALWAYS; +        clear_state.depth.write_mask = GL_TRUE;      }      if (regs.clear_buffers.S) {          ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); @@ -581,8 +662,9 @@ void RasterizerOpenGL::Clear() {          return;      } -    const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( -        clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); +    ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); + +    SyncViewport(clear_state);      if (regs.clear_flags.scissor) {          SyncScissorTest(clear_state);      } @@ -591,21 +673,18 @@ void RasterizerOpenGL::Clear() {          clear_state.EmulateViewportWithScissor();      } -    clear_state.ApplyColorMask(); -    clear_state.ApplyDepth(); -    clear_state.ApplyStencilTest(); -    clear_state.ApplyViewport(); -    clear_state.ApplyFramebufferState(); +    clear_state.AllDirty(); +    clear_state.Apply();      if (use_color) { -        glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); +        glClearBufferfv(GL_COLOR, 0, regs.clear_color);      } -    if (clear_depth && clear_stencil) { +    if (use_depth && use_stencil) {          glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); -    } else if (clear_depth) { +    } else if (use_depth) {          glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); -    } else if (clear_stencil) { +    } else if (use_stencil) {          glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);      }  } @@ -661,6 +740,7 @@ void RasterizerOpenGL::DrawArrays() {      // Upload vertex and index data.      SetupVertexBuffer(vao); +    SetupVertexInstances(vao);      const GLintptr index_buffer_offset = SetupIndexBuffer();      // Setup draw parameters. It will automatically choose what glDraw* method to use. @@ -687,7 +767,7 @@ void RasterizerOpenGL::DrawArrays() {      if (invalidate) {          // As all cached buffers are invalidated, we need to recheck their state. -        gpu.dirty_flags.vertex_array.set(); +        gpu.dirty.ResetVertexArrays();      }      shader_program_manager->ApplyTo(state); @@ -700,6 +780,7 @@ void RasterizerOpenGL::DrawArrays() {      params.DispatchDraw();      accelerate_draw = AccelDraw::Disabled; +    gpu.dirty.memory_general = false;  }  void RasterizerOpenGL::FlushAll() {} @@ -907,10 +988,11 @@ void RasterizerOpenGL::SyncClipCoef() {  }  void RasterizerOpenGL::SyncCullMode() { -    const auto& regs = system.GPU().Maxwell3D().regs; +    auto& maxwell3d = system.GPU().Maxwell3D(); -    state.cull.enabled = regs.cull.enabled != 0; +    const auto& regs = maxwell3d.regs; +    state.cull.enabled = regs.cull.enabled != 0;      if (state.cull.enabled) {          state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);          state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); @@ -943,16 +1025,21 @@ void RasterizerOpenGL::SyncDepthTestState() {      state.depth.test_enabled = regs.depth_test_enable != 0;      state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; -    if (!state.depth.test_enabled) +    if (!state.depth.test_enabled) {          return; +    }      state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);  }  void RasterizerOpenGL::SyncStencilTestState() { -    const auto& regs = system.GPU().Maxwell3D().regs; -    state.stencil.test_enabled = regs.stencil_enable != 0; +    auto& maxwell3d = system.GPU().Maxwell3D(); +    if (!maxwell3d.dirty.stencil_test) { +        return; +    } +    const auto& regs = maxwell3d.regs; +    state.stencil.test_enabled = regs.stencil_enable != 0;      if (!regs.stencil_enable) {          return;      } @@ -981,10 +1068,17 @@ void RasterizerOpenGL::SyncStencilTestState() {          state.stencil.back.action_depth_fail = GL_KEEP;          state.stencil.back.action_depth_pass = GL_KEEP;      } +    state.MarkDirtyStencilState(); +    maxwell3d.dirty.stencil_test = false;  }  void RasterizerOpenGL::SyncColorMask() { -    const auto& regs = system.GPU().Maxwell3D().regs; +    auto& maxwell3d = system.GPU().Maxwell3D(); +    if (!maxwell3d.dirty.color_mask) { +        return; +    } +    const auto& regs = maxwell3d.regs; +      const std::size_t count =          regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;      for (std::size_t i = 0; i < count; i++) { @@ -995,6 +1089,9 @@ void RasterizerOpenGL::SyncColorMask() {          dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;          dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;      } + +    state.MarkDirtyColorMask(); +    maxwell3d.dirty.color_mask = false;  }  void RasterizerOpenGL::SyncMultiSampleState() { @@ -1009,7 +1106,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {  }  void RasterizerOpenGL::SyncBlendState() { -    const auto& regs = system.GPU().Maxwell3D().regs; +    auto& maxwell3d = system.GPU().Maxwell3D(); +    if (!maxwell3d.dirty.blend_state) { +        return; +    } +    const auto& regs = maxwell3d.regs;      state.blend_color.red = regs.blend_color.r;      state.blend_color.green = regs.blend_color.g; @@ -1032,6 +1133,8 @@ void RasterizerOpenGL::SyncBlendState() {          for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {              state.blend[i].enabled = false;          } +        maxwell3d.dirty.blend_state = false; +        state.MarkDirtyBlendState();          return;      } @@ -1048,6 +1151,9 @@ void RasterizerOpenGL::SyncBlendState() {          blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);          blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);      } + +    state.MarkDirtyBlendState(); +    maxwell3d.dirty.blend_state = false;  }  void RasterizerOpenGL::SyncLogicOpState() { @@ -1099,13 +1205,21 @@ void RasterizerOpenGL::SyncPointState() {  }  void RasterizerOpenGL::SyncPolygonOffset() { -    const auto& regs = system.GPU().Maxwell3D().regs; +    auto& maxwell3d = system.GPU().Maxwell3D(); +    if (!maxwell3d.dirty.polygon_offset) { +        return; +    } +    const auto& regs = maxwell3d.regs; +      state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;      state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;      state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;      state.polygon_offset.units = regs.polygon_offset_units;      state.polygon_offset.factor = regs.polygon_offset_factor;      state.polygon_offset.clamp = regs.polygon_offset_clamp; + +    state.MarkDirtyPolygonOffset(); +    maxwell3d.dirty.polygon_offset = false;  }  void RasterizerOpenGL::SyncAlphaTest() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 40b571d58..ef34d3f54 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -108,6 +108,9 @@ private:          OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,          bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); +    void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, +                                   bool using_depth_fb, bool using_stencil_fb); +      /// Configures the current constbuffers to use for the draw command.      void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,                                 const Shader& shader); @@ -216,6 +219,7 @@ private:      GLuint SetupVertexFormat();      void SetupVertexBuffer(GLuint vao); +    void SetupVertexInstances(GLuint vao);      GLintptr SetupIndexBuffer(); @@ -226,6 +230,8 @@ private:      enum class AccelDraw { Disabled, Arrays, Indexed };      AccelDraw accelerate_draw = AccelDraw::Disabled; +    OGLFramebuffer clear_framebuffer; +      using CachedPageMap = boost::icl::interval_map<u64, int>;      CachedPageMap cached_pages;  }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 32dd9eae7..456ba0403 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -572,7 +572,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia  }  Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { -    if (!system.GPU().Maxwell3D().dirty_flags.shaders) { +    if (!system.GPU().Maxwell3D().dirty.shaders) {          return last_shaders[static_cast<std::size_t>(program)];      } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 0eae98afe..f4777d0b0 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -165,6 +165,25 @@ OpenGLState::OpenGLState() {      alpha_test.ref = 0.0f;  } +void OpenGLState::SetDefaultViewports() { +    for (auto& item : viewports) { +        item.x = 0; +        item.y = 0; +        item.width = 0; +        item.height = 0; +        item.depth_range_near = 0.0f; +        item.depth_range_far = 1.0f; +        item.scissor.enabled = false; +        item.scissor.x = 0; +        item.scissor.y = 0; +        item.scissor.width = 0; +        item.scissor.height = 0; +    } + +    depth_clamp.far_plane = false; +    depth_clamp.near_plane = false; +} +  void OpenGLState::ApplyDefaultState() {      glEnable(GL_BLEND);      glDisable(GL_FRAMEBUFFER_SRGB); @@ -526,7 +545,7 @@ void OpenGLState::ApplySamplers() const {      }  } -void OpenGLState::Apply() const { +void OpenGLState::Apply() {      MICROPROFILE_SCOPE(OpenGL_State);      ApplyFramebufferState();      ApplyVertexArrayState(); @@ -536,19 +555,31 @@ void OpenGLState::Apply() const {      ApplyPointSize();      ApplyFragmentColorClamp();      ApplyMultisample(); +    if (dirty.color_mask) { +        ApplyColorMask(); +        dirty.color_mask = false; +    }      ApplyDepthClamp(); -    ApplyColorMask();      ApplyViewport(); -    ApplyStencilTest(); +    if (dirty.stencil_state) { +        ApplyStencilTest(); +        dirty.stencil_state = false; +    }      ApplySRgb();      ApplyCulling();      ApplyDepth();      ApplyPrimitiveRestart(); -    ApplyBlending(); +    if (dirty.blend_state) { +        ApplyBlending(); +        dirty.blend_state = false; +    }      ApplyLogicOp();      ApplyTextures();      ApplySamplers(); -    ApplyPolygonOffset(); +    if (dirty.polygon_offset) { +        ApplyPolygonOffset(); +        dirty.polygon_offset = false; +    }      ApplyAlphaTest();  } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b0140495d..fdf9a8a12 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -195,8 +195,9 @@ public:          s_rgb_used = false;      } +    void SetDefaultViewports();      /// Apply this state as the current OpenGL state -    void Apply() const; +    void Apply();      void ApplyFramebufferState() const;      void ApplyVertexArrayState() const; @@ -237,11 +238,41 @@ public:      /// Viewport does not affects glClearBuffer so emulate viewport using scissor test      void EmulateViewportWithScissor(); +    void MarkDirtyBlendState() { +        dirty.blend_state = true; +    } + +    void MarkDirtyStencilState() { +        dirty.stencil_state = true; +    } + +    void MarkDirtyPolygonOffset() { +        dirty.polygon_offset = true; +    } + +    void MarkDirtyColorMask() { +        dirty.color_mask = true; +    } + +    void AllDirty() { +        dirty.blend_state = true; +        dirty.stencil_state = true; +        dirty.polygon_offset = true; +        dirty.color_mask = true; +    } +  private:      static OpenGLState cur_state;      // Workaround for sRGB problems caused by QT not supporting srgb output      static bool s_rgb_used; +    struct { +        bool blend_state; +        bool stencil_state; +        bool viewport_state; +        bool polygon_offset; +        bool color_mask; +    } dirty{};  };  } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b1f6bc7c2..8fcd39a69 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -485,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,      const auto& dst_params{dst_view->GetSurfaceParams()};      OpenGLState prev_state{OpenGLState::GetCurState()}; -    SCOPE_EXIT({ prev_state.Apply(); }); +    SCOPE_EXIT({ +        prev_state.AllDirty(); +        prev_state.Apply(); +    });      OpenGLState state;      state.draw.read_framebuffer = src_framebuffer.handle;      state.draw.draw_framebuffer = dst_framebuffer.handle; +    state.AllDirty();      state.Apply();      u32 buffers{}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 9ecdddb0d..a05cef3b9 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers(      // Maintain the rasterizer's state as a priority      OpenGLState prev_state = OpenGLState::GetCurState(); +    state.AllDirty();      state.Apply();      if (framebuffer) { @@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers(      system.GetPerfStats().BeginSystemFrame();      // Restore the rasterizer state +    prev_state.AllDirty();      prev_state.Apply();  } @@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {      // Link shaders and get variable locations      shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);      state.draw.shader_program = shader.handle; +    state.AllDirty();      state.Apply();      uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");      uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); @@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,      // Workaround brigthness problems in SMO by enabling sRGB in the final output      // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987      state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); +    state.AllDirty();      state.Apply();      glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());      glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);      // Restore default state      state.framebuffer_srgb.enabled = false;      state.texture_units[0].texture = 0; +    state.AllDirty();      state.Apply();      // Clear sRGB state for the next frame      OpenGLState::ClearsRGBUsed(); @@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {      GLuint old_read_fb = state.draw.read_framebuffer;      GLuint old_draw_fb = state.draw.draw_framebuffer;      state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; +    state.AllDirty();      state.Apply();      Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; @@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {      screenshot_framebuffer.Release();      state.draw.read_framebuffer = old_read_fb;      state.draw.draw_framebuffer = old_draw_fb; +    state.AllDirty();      state.Apply();      glDeleteRenderbuffers(1, &renderbuffer); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7f9623c62..a3a3770a7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -116,10 +116,10 @@ public:          std::lock_guard lock{mutex};          auto& maxwell3d = system.GPU().Maxwell3D(); -        if (!maxwell3d.dirty_flags.zeta_buffer) { +        if (!maxwell3d.dirty.depth_buffer) {              return depth_buffer.view;          } -        maxwell3d.dirty_flags.zeta_buffer = false; +        maxwell3d.dirty.depth_buffer = false;          const auto& regs{maxwell3d.regs};          const auto gpu_addr{regs.zeta.Address()}; @@ -145,10 +145,10 @@ public:          std::lock_guard lock{mutex};          ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);          auto& maxwell3d = system.GPU().Maxwell3D(); -        if (!maxwell3d.dirty_flags.color_buffer[index]) { +        if (!maxwell3d.dirty.render_target[index]) {              return render_targets[index].view;          } -        maxwell3d.dirty_flags.color_buffer.reset(index); +        maxwell3d.dirty.render_target[index] = false;          const auto& regs{maxwell3d.regs};          if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || @@ -274,10 +274,11 @@ protected:          auto& maxwell3d = system.GPU().Maxwell3D();          const u32 index = surface->GetRenderTarget();          if (index == DEPTH_RT) { -            maxwell3d.dirty_flags.zeta_buffer = true; +            maxwell3d.dirty.depth_buffer = true;          } else { -            maxwell3d.dirty_flags.color_buffer.set(index, true); +            maxwell3d.dirty.render_target[index] = true;          } +        maxwell3d.dirty.render_settings = true;      }      void Register(TSurface surface) {  | 
