diff options
Diffstat (limited to 'src/video_core')
21 files changed, 2196 insertions, 44 deletions
| diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4c1e6449a..9866078d4 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,7 +1,11 @@  set(SRCS              renderer_opengl/generated/gl_3_2_core.c -            renderer_opengl/renderer_opengl.cpp +            renderer_opengl/gl_rasterizer.cpp +            renderer_opengl/gl_rasterizer_cache.cpp +            renderer_opengl/gl_resource_manager.cpp              renderer_opengl/gl_shader_util.cpp +            renderer_opengl/gl_state.cpp +            renderer_opengl/renderer_opengl.cpp              debug_utils/debug_utils.cpp              clipper.cpp              command_processor.cpp @@ -15,13 +19,19 @@ set(SRCS  set(HEADERS              debug_utils/debug_utils.h              renderer_opengl/generated/gl_3_2_core.h +            renderer_opengl/gl_rasterizer.h +            renderer_opengl/gl_rasterizer_cache.h +            renderer_opengl/gl_resource_manager.h              renderer_opengl/gl_shader_util.h              renderer_opengl/gl_shaders.h +            renderer_opengl/gl_state.h +            renderer_opengl/pica_to_gl.h              renderer_opengl/renderer_opengl.h              clipper.h              color.h              command_processor.h              gpu_debugger.h +            hwrasterizer_base.h              math.h              pica.h              primitive_assembly.h diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 6121df8e3..5c4c04408 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -12,8 +12,10 @@  #include "pica.h"  #include "primitive_assembly.h"  #include "vertex_shader.h" +#include "video_core.h"  #include "core/hle/service/gsp_gpu.h"  #include "core/hw/gpu.h" +#include "core/settings.h"  #include "debug_utils/debug_utils.h" @@ -107,7 +109,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {              bool index_u16 = index_info.format != 0;              DebugUtils::GeometryDumper geometry_dumper; -            PrimitiveAssembler<VertexShader::OutputVertex> clipper_primitive_assembler(registers.triangle_topology.Value()); +            PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(registers.triangle_topology.Value());              PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(registers.triangle_topology.Value());              for (unsigned int index = 0; index < registers.num_vertices; ++index) @@ -185,9 +187,25 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {                      // TODO: Add processed vertex to vertex cache!                  } -                // Send to triangle clipper -                clipper_primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle); +                if (Settings::values.use_hw_renderer) { +                    // Send to hardware renderer +                    static auto AddHWTriangle = [](const Pica::VertexShader::OutputVertex& v0, +                                                   const Pica::VertexShader::OutputVertex& v1, +                                                   const Pica::VertexShader::OutputVertex& v2) { +                        VideoCore::g_renderer->hw_rasterizer->AddTriangle(v0, v1, v2); +                    }; +                     +                    primitive_assembler.SubmitVertex(output, AddHWTriangle); +                } else { +                    // Send to triangle clipper +                    primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle); +                }              } + +            if (Settings::values.use_hw_renderer) { +                VideoCore::g_renderer->hw_rasterizer->DrawTriangles(); +            } +              geometry_dumper.Dump();              if (g_debug_context) @@ -340,6 +358,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {              break;      } +    VideoCore::g_renderer->hw_rasterizer->NotifyPicaRegisterChanged(id); +      if (g_debug_context)          g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id));  } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 883df48a5..9da44ccd6 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -24,6 +24,7 @@  #include "video_core/math.h"  #include "video_core/pica.h"  #include "video_core/utils.h" +#include "video_core/video_core.h"  #include "debug_utils.h" @@ -40,6 +41,9 @@ void DebugContext::OnEvent(Event event, void* data) {      {          std::unique_lock<std::mutex> lock(breakpoint_mutex); +        // Commit the hardware renderer's framebuffer so it will show on debug widgets +        VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer(); +          // TODO: Should stop the CPU thread here once we multithread emulation.          active_breakpoint = event; diff --git a/src/video_core/hwrasterizer_base.h b/src/video_core/hwrasterizer_base.h new file mode 100644 index 000000000..dec193f8b --- /dev/null +++ b/src/video_core/hwrasterizer_base.h @@ -0,0 +1,40 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/emu_window.h" +#include "video_core/vertex_shader.h" + +class HWRasterizer { +public: +    virtual ~HWRasterizer() { +    } + +    /// Initialize API-specific GPU objects +    virtual void InitObjects() = 0; + +    /// Reset the rasterizer, such as flushing all caches and updating all state +    virtual void Reset() = 0; + +    /// Queues the primitive formed by the given vertices for rendering +    virtual void AddTriangle(const Pica::VertexShader::OutputVertex& v0, +                             const Pica::VertexShader::OutputVertex& v1, +                             const Pica::VertexShader::OutputVertex& v2) = 0; + +    /// Draw the current batch of triangles +    virtual void DrawTriangles() = 0; + +    /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer +    virtual void CommitFramebuffer() = 0; + +    /// Notify rasterizer that the specified PICA register has been changed +    virtual void NotifyPicaRegisterChanged(u32 id) = 0; + +    /// Notify rasterizer that the specified 3DS memory region will be read from after this notification +    virtual void NotifyPreRead(PAddr addr, u32 size) = 0; + +    /// Notify rasterizer that a 3DS memory region has been changed +    virtual void NotifyFlush(PAddr addr, u32 size) = 0; +}; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index e9bc7fb3b..503c09eca 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -490,20 +490,37 @@ struct Regs {          }      } -    struct { -        // Components are laid out in reverse byte order, most significant bits first. -        enum ColorFormat : u32 { -            RGBA8    = 0, -            RGB8     = 1, -            RGB5A1   = 2, -            RGB565   = 3, -            RGBA4    = 4, -        }; +    // Components are laid out in reverse byte order, most significant bits first. +    enum ColorFormat : u32 { +        RGBA8    = 0, +        RGB8     = 1, +        RGB5A1   = 2, +        RGB565   = 3, +        RGBA4    = 4, +    }; +    // Returns the number of bytes in the specified color format +    static unsigned BytesPerColorPixel(ColorFormat format) { +        switch (format) { +        case ColorFormat::RGBA8: +            return 4; +        case ColorFormat::RGB8: +            return 3; +        case ColorFormat::RGB5A1: +        case ColorFormat::RGB565: +        case ColorFormat::RGBA4: +            return 2; +        default: +            LOG_CRITICAL(HW_GPU, "Unknown color format %u", format); +            UNIMPLEMENTED(); +        } +    } + +    struct {          INSERT_PADDING_WORDS(0x6);          DepthFormat depth_format; -        BitField<16, 3, u32> color_format; +        BitField<16, 3, ColorFormat> color_format;          INSERT_PADDING_WORDS(0x4); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 59eff48f9..362efe52e 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -36,23 +36,23 @@ static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {      u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;      switch (registers.framebuffer.color_format) { -    case registers.framebuffer.RGBA8: +    case Pica::Regs::ColorFormat::RGBA8:          Color::EncodeRGBA8(color, dst_pixel);          break; -    case registers.framebuffer.RGB8: +    case Pica::Regs::ColorFormat::RGB8:          Color::EncodeRGB8(color, dst_pixel);          break; -    case registers.framebuffer.RGB5A1: +    case Pica::Regs::ColorFormat::RGB5A1:          Color::EncodeRGB5A1(color, dst_pixel);          break; -    case registers.framebuffer.RGB565: +    case Pica::Regs::ColorFormat::RGB565:          Color::EncodeRGB565(color, dst_pixel);          break; -    case registers.framebuffer.RGBA4: +    case Pica::Regs::ColorFormat::RGBA4:          Color::EncodeRGBA4(color, dst_pixel);          break; @@ -73,19 +73,19 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {      u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;      switch (registers.framebuffer.color_format) { -    case registers.framebuffer.RGBA8: +    case Pica::Regs::ColorFormat::RGBA8:          return Color::DecodeRGBA8(src_pixel); -    case registers.framebuffer.RGB8: +    case Pica::Regs::ColorFormat::RGB8:          return Color::DecodeRGB8(src_pixel); -    case registers.framebuffer.RGB5A1: +    case Pica::Regs::ColorFormat::RGB5A1:          return Color::DecodeRGB5A1(src_pixel); -    case registers.framebuffer.RGB565: +    case Pica::Regs::ColorFormat::RGB565:          return Color::DecodeRGB565(src_pixel); -    case registers.framebuffer.RGBA4: +    case Pica::Regs::ColorFormat::RGBA4:          return Color::DecodeRGBA4(src_pixel);      default: diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index b62409538..5757ac75d 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -6,6 +6,8 @@  #include "common/common_types.h" +#include "video_core/hwrasterizer_base.h" +  class RendererBase : NonCopyable {  public: @@ -48,6 +50,8 @@ public:          return m_current_frame;      } +    std::unique_ptr<HWRasterizer> hw_rasterizer; +  protected:      f32 m_current_fps;              ///< Current framerate, should be set by the renderer      int m_current_frame;            ///< Current frame, should be set by the renderer diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp new file mode 100644 index 000000000..e44375547 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -0,0 +1,879 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/settings.h" +#include "core/hw/gpu.h" + +#include "video_core/color.h" +#include "video_core/pica.h" +#include "video_core/utils.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shaders.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/pica_to_gl.h" + +#include "generated/gl_3_2_core.h" + +#include <memory> + +static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { +    return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace && +            stage.alpha_op == Pica::Regs::TevStageConfig::Operation::Replace && +            stage.color_source1 == Pica::Regs::TevStageConfig::Source::Previous && +            stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous && +            stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor && +            stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha && +            stage.GetColorMultiplier() == 1 && +            stage.GetAlphaMultiplier() == 1); +} + +RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr(0) { } +RasterizerOpenGL::~RasterizerOpenGL() { } + +void RasterizerOpenGL::InitObjects() { +    // Create the hardware shader program and get attrib/uniform locations +    shader.Create(GLShaders::g_vertex_shader_hw, GLShaders::g_fragment_shader_hw); +    attrib_position = glGetAttribLocation(shader.handle, "vert_position"); +    attrib_color = glGetAttribLocation(shader.handle, "vert_color"); +    attrib_texcoords = glGetAttribLocation(shader.handle, "vert_texcoords"); + +    uniform_alphatest_enabled = glGetUniformLocation(shader.handle, "alphatest_enabled"); +    uniform_alphatest_func = glGetUniformLocation(shader.handle, "alphatest_func"); +    uniform_alphatest_ref = glGetUniformLocation(shader.handle, "alphatest_ref"); + +    uniform_tex = glGetUniformLocation(shader.handle, "tex"); + +    uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.handle, "tev_combiner_buffer_color"); + +    const auto tev_stages = Pica::registers.GetTevStages(); +    for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { +        auto& uniform_tev_cfg = uniform_tev_cfgs[tev_stage_index]; + +        std::string tev_ref_str = "tev_cfgs[" + std::to_string(tev_stage_index) + "]"; +        uniform_tev_cfg.enabled = glGetUniformLocation(shader.handle, (tev_ref_str + ".enabled").c_str()); +        uniform_tev_cfg.color_sources = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_sources").c_str()); +        uniform_tev_cfg.alpha_sources = glGetUniformLocation(shader.handle, (tev_ref_str + ".alpha_sources").c_str()); +        uniform_tev_cfg.color_modifiers = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_modifiers").c_str()); +        uniform_tev_cfg.alpha_modifiers = glGetUniformLocation(shader.handle, (tev_ref_str + ".alpha_modifiers").c_str()); +        uniform_tev_cfg.color_alpha_op = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_alpha_op").c_str()); +        uniform_tev_cfg.color_alpha_multiplier = glGetUniformLocation(shader.handle, (tev_ref_str + ".color_alpha_multiplier").c_str()); +        uniform_tev_cfg.const_color = glGetUniformLocation(shader.handle, (tev_ref_str + ".const_color").c_str()); +        uniform_tev_cfg.updates_combiner_buffer_color_alpha = glGetUniformLocation(shader.handle, (tev_ref_str + ".updates_combiner_buffer_color_alpha").c_str()); +    } + +    // Generate VBO and VAO +    vertex_buffer.Create(); +    vertex_array.Create(); + +    // Update OpenGL state +    state.draw.vertex_array = vertex_array.handle; +    state.draw.vertex_buffer = vertex_buffer.handle; +    state.draw.shader_program = shader.handle; + +    state.Apply(); + +    // Set the texture samplers to correspond to different texture units +    glUniform1i(uniform_tex, 0); +    glUniform1i(uniform_tex + 1, 1); +    glUniform1i(uniform_tex + 2, 2); + +    // Set vertex attributes +    glVertexAttribPointer(attrib_position, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); +    glVertexAttribPointer(attrib_color, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, color)); +    glVertexAttribPointer(attrib_texcoords, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0)); +    glVertexAttribPointer(attrib_texcoords + 1, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord1)); +    glVertexAttribPointer(attrib_texcoords + 2, 2, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord2)); +    glEnableVertexAttribArray(attrib_position); +    glEnableVertexAttribArray(attrib_color); +    glEnableVertexAttribArray(attrib_texcoords); +    glEnableVertexAttribArray(attrib_texcoords + 1); +    glEnableVertexAttribArray(attrib_texcoords + 2); + +    // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation +    fb_color_texture.texture.Create(); +    ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + +    fb_depth_texture.texture.Create(); +    ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL); +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE); + +    // Configure OpenGL framebuffer +    framebuffer.Create(); + +    state.draw.framebuffer = framebuffer.handle; + +    // Unbind texture to allow binding to framebuffer +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = 0; +    state.Apply(); + +    glActiveTexture(GL_TEXTURE0); +    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); +    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); + +    ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, +               "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); +} + +void RasterizerOpenGL::Reset() { +    SyncCullMode(); +    SyncBlendEnabled(); +    SyncBlendFuncs(); +    SyncBlendColor(); +    SyncAlphaTest(); +    SyncStencilTest(); +    SyncDepthTest(); + +    // TEV stage 0 +    SyncTevSources(0, Pica::registers.tev_stage0); +    SyncTevModifiers(0, Pica::registers.tev_stage0); +    SyncTevOps(0, Pica::registers.tev_stage0); +    SyncTevColor(0, Pica::registers.tev_stage0); +    SyncTevMultipliers(0, Pica::registers.tev_stage0); + +    // TEV stage 1 +    SyncTevSources(1, Pica::registers.tev_stage1); +    SyncTevModifiers(1, Pica::registers.tev_stage1); +    SyncTevOps(1, Pica::registers.tev_stage1); +    SyncTevColor(1, Pica::registers.tev_stage1); +    SyncTevMultipliers(1, Pica::registers.tev_stage1); + +    // TEV stage 2 +    SyncTevSources(2, Pica::registers.tev_stage2); +    SyncTevModifiers(2, Pica::registers.tev_stage2); +    SyncTevOps(2, Pica::registers.tev_stage2); +    SyncTevColor(2, Pica::registers.tev_stage2); +    SyncTevMultipliers(2, Pica::registers.tev_stage2); + +    // TEV stage 3 +    SyncTevSources(3, Pica::registers.tev_stage3); +    SyncTevModifiers(3, Pica::registers.tev_stage3); +    SyncTevOps(3, Pica::registers.tev_stage3); +    SyncTevColor(3, Pica::registers.tev_stage3); +    SyncTevMultipliers(3, Pica::registers.tev_stage3); + +    // TEV stage 4 +    SyncTevSources(4, Pica::registers.tev_stage4); +    SyncTevModifiers(4, Pica::registers.tev_stage4); +    SyncTevOps(4, Pica::registers.tev_stage4); +    SyncTevColor(4, Pica::registers.tev_stage4); +    SyncTevMultipliers(4, Pica::registers.tev_stage4); + +    // TEV stage 5 +    SyncTevSources(5, Pica::registers.tev_stage5); +    SyncTevModifiers(5, Pica::registers.tev_stage5); +    SyncTevOps(5, Pica::registers.tev_stage5); +    SyncTevColor(5, Pica::registers.tev_stage5); +    SyncTevMultipliers(5, Pica::registers.tev_stage5); + +    SyncCombinerColor(); +    SyncCombinerWriteFlags(); + +    res_cache.FullFlush(); +} + +void RasterizerOpenGL::AddTriangle(const Pica::VertexShader::OutputVertex& v0, +                                   const Pica::VertexShader::OutputVertex& v1, +                                   const Pica::VertexShader::OutputVertex& v2) { +    vertex_batch.push_back(HardwareVertex(v0)); +    vertex_batch.push_back(HardwareVertex(v1)); +    vertex_batch.push_back(HardwareVertex(v2)); +} + +void RasterizerOpenGL::DrawTriangles() { +    SyncFramebuffer(); +    SyncDrawState(); + +    glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); +    glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); + +    vertex_batch.clear(); + +    // TODO: Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture +} + +void RasterizerOpenGL::CommitFramebuffer() { +    CommitColorBuffer(); +    CommitDepthBuffer(); +} + +void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { +    if (!Settings::values.use_hw_renderer) +        return; + +    switch(id) { +    // Culling +    case PICA_REG_INDEX(cull_mode): +        SyncCullMode(); +        break; + +    // Blending +    case PICA_REG_INDEX(output_merger.alphablend_enable): +        SyncBlendEnabled(); +        break; +    case PICA_REG_INDEX(output_merger.alpha_blending): +        SyncBlendFuncs(); +        break; +    case PICA_REG_INDEX(output_merger.blend_const): +        SyncBlendColor(); +        break; + +    // Alpha test +    case PICA_REG_INDEX(output_merger.alpha_test): +        SyncAlphaTest(); +        break; + +    // Stencil test +    case PICA_REG_INDEX(output_merger.stencil_test): +        SyncStencilTest(); +        break; + +    // Depth test +    case PICA_REG_INDEX(output_merger.depth_test_enable): +        SyncDepthTest(); +        break; + +    // TEV stage 0 +    case PICA_REG_INDEX(tev_stage0.color_source1): +        SyncTevSources(0, Pica::registers.tev_stage0); +        break; +    case PICA_REG_INDEX(tev_stage0.color_modifier1): +        SyncTevModifiers(0, Pica::registers.tev_stage0); +        break; +    case PICA_REG_INDEX(tev_stage0.color_op): +        SyncTevOps(0, Pica::registers.tev_stage0); +        break; +    case PICA_REG_INDEX(tev_stage0.const_r): +        SyncTevColor(0, Pica::registers.tev_stage0); +        break; +    case PICA_REG_INDEX(tev_stage0.color_scale): +        SyncTevMultipliers(0, Pica::registers.tev_stage0); +        break; + +    // TEV stage 1 +    case PICA_REG_INDEX(tev_stage1.color_source1): +        SyncTevSources(1, Pica::registers.tev_stage1); +        break; +    case PICA_REG_INDEX(tev_stage1.color_modifier1): +        SyncTevModifiers(1, Pica::registers.tev_stage1); +        break; +    case PICA_REG_INDEX(tev_stage1.color_op): +        SyncTevOps(1, Pica::registers.tev_stage1); +        break; +    case PICA_REG_INDEX(tev_stage1.const_r): +        SyncTevColor(1, Pica::registers.tev_stage1); +        break; +    case PICA_REG_INDEX(tev_stage1.color_scale): +        SyncTevMultipliers(1, Pica::registers.tev_stage1); +        break; + +    // TEV stage 2 +    case PICA_REG_INDEX(tev_stage2.color_source1): +        SyncTevSources(2, Pica::registers.tev_stage2); +        break; +    case PICA_REG_INDEX(tev_stage2.color_modifier1): +        SyncTevModifiers(2, Pica::registers.tev_stage2); +        break; +    case PICA_REG_INDEX(tev_stage2.color_op): +        SyncTevOps(2, Pica::registers.tev_stage2); +        break; +    case PICA_REG_INDEX(tev_stage2.const_r): +        SyncTevColor(2, Pica::registers.tev_stage2); +        break; +    case PICA_REG_INDEX(tev_stage2.color_scale): +        SyncTevMultipliers(2, Pica::registers.tev_stage2); +        break; + +    // TEV stage 3 +    case PICA_REG_INDEX(tev_stage3.color_source1): +        SyncTevSources(3, Pica::registers.tev_stage3); +        break; +    case PICA_REG_INDEX(tev_stage3.color_modifier1): +        SyncTevModifiers(3, Pica::registers.tev_stage3); +        break; +    case PICA_REG_INDEX(tev_stage3.color_op): +        SyncTevOps(3, Pica::registers.tev_stage3); +        break; +    case PICA_REG_INDEX(tev_stage3.const_r): +        SyncTevColor(3, Pica::registers.tev_stage3); +        break; +    case PICA_REG_INDEX(tev_stage3.color_scale): +        SyncTevMultipliers(3, Pica::registers.tev_stage3); +        break; + +    // TEV stage 4 +    case PICA_REG_INDEX(tev_stage4.color_source1): +        SyncTevSources(4, Pica::registers.tev_stage4); +        break; +    case PICA_REG_INDEX(tev_stage4.color_modifier1): +        SyncTevModifiers(4, Pica::registers.tev_stage4); +        break; +    case PICA_REG_INDEX(tev_stage4.color_op): +        SyncTevOps(4, Pica::registers.tev_stage4); +        break; +    case PICA_REG_INDEX(tev_stage4.const_r): +        SyncTevColor(4, Pica::registers.tev_stage4); +        break; +    case PICA_REG_INDEX(tev_stage4.color_scale): +        SyncTevMultipliers(4, Pica::registers.tev_stage4); +        break; + +    // TEV stage 5 +    case PICA_REG_INDEX(tev_stage5.color_source1): +        SyncTevSources(5, Pica::registers.tev_stage5); +        break; +    case PICA_REG_INDEX(tev_stage5.color_modifier1): +        SyncTevModifiers(5, Pica::registers.tev_stage5); +        break; +    case PICA_REG_INDEX(tev_stage5.color_op): +        SyncTevOps(5, Pica::registers.tev_stage5); +        break; +    case PICA_REG_INDEX(tev_stage5.const_r): +        SyncTevColor(5, Pica::registers.tev_stage5); +        break; +    case PICA_REG_INDEX(tev_stage5.color_scale): +        SyncTevMultipliers(5, Pica::registers.tev_stage5); +        break; +     +    // TEV combiner buffer color +    case PICA_REG_INDEX(tev_combiner_buffer_color): +        SyncCombinerColor(); +        break; + +    // TEV combiner buffer write flags +    case PICA_REG_INDEX(tev_combiner_buffer_input): +        SyncCombinerWriteFlags(); +        break; +    } +} + +void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { +    if (!Settings::values.use_hw_renderer) +        return; + +    PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); +    u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format) +                            * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + +    PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); +    u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format) +                            * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + +    // If source memory region overlaps 3DS framebuffers, commit them before the copy happens +    if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) +        CommitColorBuffer(); + +    if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) +        CommitDepthBuffer(); +} + +void RasterizerOpenGL::NotifyFlush(PAddr addr, u32 size) { +    if (!Settings::values.use_hw_renderer) +        return; + +    PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); +    u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format) +                            * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + +    PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); +    u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format) +                            * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + +    // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL +    if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) +        ReloadColorBuffer(); + +    if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) +        ReloadDepthBuffer(); + +    // Notify cache of flush in case the region touches a cached resource +    res_cache.NotifyFlush(addr, size); +} + +void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) { +    GLint internal_format; + +    texture.format = format; +    texture.width = width; +    texture.height = height; + +    switch (format) { +    case Pica::Regs::ColorFormat::RGBA8: +        internal_format = GL_RGBA; +        texture.gl_format = GL_RGBA; +        texture.gl_type = GL_UNSIGNED_INT_8_8_8_8; +        break; + +    case Pica::Regs::ColorFormat::RGB8: +        // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every +        // specific OpenGL type used in this function using native-endian (that is, little-endian +        // mostly everywhere) for words or half-words. +        // TODO: check how those behave on big-endian processors. +        internal_format = GL_RGB; +        texture.gl_format = GL_BGR; +        texture.gl_type = GL_UNSIGNED_BYTE; +        break; + +    case Pica::Regs::ColorFormat::RGB5A1: +        internal_format = GL_RGBA; +        texture.gl_format = GL_RGBA; +        texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1; +        break; + +    case Pica::Regs::ColorFormat::RGB565: +        internal_format = GL_RGB; +        texture.gl_format = GL_RGB; +        texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; +        break; + +    case Pica::Regs::ColorFormat::RGBA4: +        internal_format = GL_RGBA; +        texture.gl_format = GL_RGBA; +        texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4; +        break; + +    default: +        LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format); +        UNIMPLEMENTED(); +        break; +    } + +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = texture.texture.handle; +    state.Apply(); + +    glActiveTexture(GL_TEXTURE0); +    glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, +                 texture.gl_format, texture.gl_type, nullptr); +} + +void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) { +    GLint internal_format; + +    texture.format = format; +    texture.width = width; +    texture.height = height; + +    switch (format) { +    case Pica::Regs::DepthFormat::D16: +        internal_format = GL_DEPTH_COMPONENT16; +        texture.gl_format = GL_DEPTH_COMPONENT; +        texture.gl_type = GL_UNSIGNED_SHORT; +        break; + +    case Pica::Regs::DepthFormat::D24: +        internal_format = GL_DEPTH_COMPONENT24; +        texture.gl_format = GL_DEPTH_COMPONENT; +        texture.gl_type = GL_UNSIGNED_INT_24_8; +        break; + +    case Pica::Regs::DepthFormat::D24S8: +        internal_format = GL_DEPTH24_STENCIL8; +        texture.gl_format = GL_DEPTH_STENCIL; +        texture.gl_type = GL_UNSIGNED_INT_24_8; +        break; + +    default: +        LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format); +        UNIMPLEMENTED(); +        break; +    } + +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = texture.texture.handle; +    state.Apply(); + +    glActiveTexture(GL_TEXTURE0); +    glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, +                 texture.gl_format, texture.gl_type, nullptr); +} + +void RasterizerOpenGL::SyncFramebuffer() { +    PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); +    Pica::Regs::ColorFormat new_fb_color_format = Pica::registers.framebuffer.color_format; + +    PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); +    Pica::Regs::DepthFormat new_fb_depth_format = Pica::registers.framebuffer.depth_format; + +    bool fb_size_changed = fb_color_texture.width != Pica::registers.framebuffer.GetWidth() || +                           fb_color_texture.height != Pica::registers.framebuffer.GetHeight(); + +    bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format || +                                 fb_size_changed; + +    bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || +                                 fb_size_changed; + +    bool color_fb_modified = last_fb_color_addr != cur_fb_color_addr || +                             color_fb_prop_changed; + +    bool depth_fb_modified = last_fb_depth_addr != cur_fb_depth_addr || +                             depth_fb_prop_changed; + +    // Commit if framebuffer modified in any way +    if (color_fb_modified) +        CommitColorBuffer(); + +    if (depth_fb_modified) +        CommitDepthBuffer(); + +    // Reconfigure framebuffer textures if any property has changed +    if (color_fb_prop_changed) { +        ReconfigureColorTexture(fb_color_texture, new_fb_color_format, +                                Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight()); +    } + +    if (depth_fb_prop_changed) { +        ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format, +                                Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight()); + +        // Only attach depth buffer as stencil if it supports stencil +        switch (new_fb_depth_format) { +        case Pica::Regs::DepthFormat::D16: +        case Pica::Regs::DepthFormat::D24: +            glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); +            break; + +        case Pica::Regs::DepthFormat::D24S8: +            glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); +            break; + +        default: +            LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format); +            UNIMPLEMENTED(); +            break; +        } +    } + +    // Load buffer data again if fb modified in any way +    if (color_fb_modified) { +        last_fb_color_addr = cur_fb_color_addr; + +        ReloadColorBuffer(); +    } + +    if (depth_fb_modified) { +        last_fb_depth_addr = cur_fb_depth_addr; + +        ReloadDepthBuffer(); +    } +} + +void RasterizerOpenGL::SyncCullMode() { +    switch (Pica::registers.cull_mode) { +    case Pica::Regs::CullMode::KeepAll: +        state.cull.enabled = false; +        break; + +    case Pica::Regs::CullMode::KeepClockWise: +        state.cull.enabled = true; +        state.cull.mode = GL_BACK; +        break; + +    case Pica::Regs::CullMode::KeepCounterClockWise: +        state.cull.enabled = true; +        state.cull.mode = GL_FRONT; +        break; + +    default: +        LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", Pica::registers.cull_mode.Value()); +        UNIMPLEMENTED(); +        break; +    } +} + +void RasterizerOpenGL::SyncBlendEnabled() { +    state.blend.enabled = Pica::registers.output_merger.alphablend_enable; +} + +void RasterizerOpenGL::SyncBlendFuncs() { +    state.blend.src_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_rgb); +    state.blend.dst_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_rgb); +    state.blend.src_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_a); +    state.blend.dst_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_a); +} + +void RasterizerOpenGL::SyncBlendColor() { +    auto blend_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.output_merger.blend_const.r); +    state.blend.color.red = blend_color[0]; +    state.blend.color.green = blend_color[1]; +    state.blend.color.blue = blend_color[2]; +    state.blend.color.alpha = blend_color[3]; +} + +void RasterizerOpenGL::SyncAlphaTest() { +    glUniform1i(uniform_alphatest_enabled, Pica::registers.output_merger.alpha_test.enable); +    glUniform1i(uniform_alphatest_func, Pica::registers.output_merger.alpha_test.func); +    glUniform1f(uniform_alphatest_ref, Pica::registers.output_merger.alpha_test.ref / 255.0f); +} + +void RasterizerOpenGL::SyncStencilTest() { +    // TODO: Implement stencil test, mask, and op +} + +void RasterizerOpenGL::SyncDepthTest() { +    state.depth.test_enabled = Pica::registers.output_merger.depth_test_enable; +    state.depth.test_func = PicaToGL::CompareFunc(Pica::registers.output_merger.depth_test_func); +    state.depth.write_mask = Pica::registers.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; +} + +void RasterizerOpenGL::SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { +    GLint color_srcs[3] = { (GLint)config.color_source1.Value(), +                            (GLint)config.color_source2.Value(), +                            (GLint)config.color_source3.Value() }; +    GLint alpha_srcs[3] = { (GLint)config.alpha_source1.Value(), +                            (GLint)config.alpha_source2.Value(), +                            (GLint)config.alpha_source3.Value() }; + +    glUniform3iv(uniform_tev_cfgs[stage_index].color_sources, 1, color_srcs); +    glUniform3iv(uniform_tev_cfgs[stage_index].alpha_sources, 1, alpha_srcs); +} + +void RasterizerOpenGL::SyncTevModifiers(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { +    GLint color_mods[3] = { (GLint)config.color_modifier1.Value(), +                            (GLint)config.color_modifier2.Value(), +                            (GLint)config.color_modifier3.Value() }; +    GLint alpha_mods[3] = { (GLint)config.alpha_modifier1.Value(), +                            (GLint)config.alpha_modifier2.Value(), +                            (GLint)config.alpha_modifier3.Value() }; + +    glUniform3iv(uniform_tev_cfgs[stage_index].color_modifiers, 1, color_mods); +    glUniform3iv(uniform_tev_cfgs[stage_index].alpha_modifiers, 1, alpha_mods); +} + +void RasterizerOpenGL::SyncTevOps(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { +    glUniform2i(uniform_tev_cfgs[stage_index].color_alpha_op, (GLint)config.color_op.Value(), (GLint)config.alpha_op.Value()); +} + +void RasterizerOpenGL::SyncTevColor(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { +    auto const_color = PicaToGL::ColorRGBA8((u8*)&config.const_r); +    glUniform4fv(uniform_tev_cfgs[stage_index].const_color, 1, const_color.data()); +} + +void RasterizerOpenGL::SyncTevMultipliers(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { +    glUniform2i(uniform_tev_cfgs[stage_index].color_alpha_multiplier, config.GetColorMultiplier(), config.GetAlphaMultiplier()); +} + +void RasterizerOpenGL::SyncCombinerColor() { +    auto combiner_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.tev_combiner_buffer_color.r); +    glUniform4fv(uniform_tev_combiner_buffer_color, 1, combiner_color.data()); +} + +void RasterizerOpenGL::SyncCombinerWriteFlags() { +    const auto tev_stages = Pica::registers.GetTevStages(); +    for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { +        glUniform2i(uniform_tev_cfgs[tev_stage_index].updates_combiner_buffer_color_alpha, +                    Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index), +                    Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)); +    } +} + +void RasterizerOpenGL::SyncDrawState() { +    // Sync the viewport +    GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_x).ToFloat32() * 2; +    GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_y).ToFloat32() * 2; + +    // OpenGL uses different y coordinates, so negate corner offset and flip origin +    // TODO: Ensure viewport_corner.x should not be negated or origin flipped +    // TODO: Use floating-point viewports for accuracy if supported +    glViewport((GLsizei)static_cast<float>(Pica::registers.viewport_corner.x), +                -(GLsizei)static_cast<float>(Pica::registers.viewport_corner.y) +                    + Pica::registers.framebuffer.GetHeight() - viewport_height, +                viewport_width, viewport_height); + +    // Sync bound texture(s), upload if not cached +    const auto pica_textures = Pica::registers.GetTextures(); +    for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { +        const auto& texture = pica_textures[texture_index]; + +        if (texture.enabled) { +            state.texture_units[texture_index].enabled_2d = true; +            res_cache.LoadAndBindTexture(state, texture_index, texture); +        } else { +            state.texture_units[texture_index].enabled_2d = false; +        } +    } + +    // Skip processing TEV stages that simply pass the previous stage results through +    const auto tev_stages = Pica::registers.GetTevStages(); +    for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { +        glUniform1i(uniform_tev_cfgs[tev_stage_index].enabled, !IsPassThroughTevStage(tev_stages[tev_stage_index])); +    } + +    state.Apply(); +} + +void RasterizerOpenGL::ReloadColorBuffer() { +    u8* color_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetColorBufferPhysicalAddress()); + +    if (color_buffer == nullptr) +        return; + +    u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); + +    std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]); + +    // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. +    for (int y = 0; y < fb_color_texture.height; ++y) { +        for (int x = 0; x < fb_color_texture.width; ++x) { +            const u32 coarse_y = y & ~7; +            u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; +            u32 gl_px_idx = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + +            u8* pixel = color_buffer + dst_offset; +            memcpy(&temp_fb_color_buffer[gl_px_idx], pixel, bytes_per_pixel); +        } +    } + +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = fb_color_texture.texture.handle; +    state.Apply(); + +    glActiveTexture(GL_TEXTURE0); +    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height, +                    fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get()); +} + +void RasterizerOpenGL::ReloadDepthBuffer() { +    // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil +    u8* depth_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetDepthBufferPhysicalAddress()); + +    if (depth_buffer == nullptr) { +        return; +    } + +    u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); + +    // OpenGL needs 4 bpp alignment for D24 +    u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel; + +    std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]); + +    for (int y = 0; y < fb_depth_texture.height; ++y) { +        for (int x = 0; x < fb_depth_texture.width; ++x) { +            const u32 coarse_y = y & ~7; +            u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; +            u32 gl_px_idx = x + y * fb_depth_texture.width; + +            switch (fb_depth_texture.format) { +            case Pica::Regs::DepthFormat::D16: +                ((u16*)temp_fb_depth_buffer.get())[gl_px_idx] = Color::DecodeD16(depth_buffer + dst_offset); +                break; +            case Pica::Regs::DepthFormat::D24: +                ((u32*)temp_fb_depth_buffer.get())[gl_px_idx] = Color::DecodeD24(depth_buffer + dst_offset); +                break; +            case Pica::Regs::DepthFormat::D24S8: +            { +                Math::Vec2<u32> depth_stencil = Color::DecodeD24S8(depth_buffer + dst_offset); +                ((u32*)temp_fb_depth_buffer.get())[gl_px_idx] = (depth_stencil.x << 8) | depth_stencil.y; +                break; +            } +            default: +                LOG_CRITICAL(Render_OpenGL, "Unknown memory framebuffer depth format %x", fb_depth_texture.format); +                UNIMPLEMENTED(); +                break; +            } +        } +    } + +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; +    state.Apply(); + +    glActiveTexture(GL_TEXTURE0); +    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, +                    fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); +} + +void RasterizerOpenGL::CommitColorBuffer() { +    if (last_fb_color_addr != 0) { +        u8* color_buffer = Memory::GetPhysicalPointer(last_fb_color_addr); + +        if (color_buffer != nullptr) { +            u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); + +            std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]); + +            state.texture_units[0].enabled_2d = true; +            state.texture_units[0].texture_2d = fb_color_texture.texture.handle; +            state.Apply(); + +            glActiveTexture(GL_TEXTURE0); +            glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get()); + +            // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. +            for (int y = 0; y < fb_color_texture.height; ++y) { +                for (int x = 0; x < fb_color_texture.width; ++x) { +                    const u32 coarse_y = y & ~7; +                    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; +                    u32 gl_px_idx = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + +                    u8* pixel = color_buffer + dst_offset; +                    memcpy(pixel, &temp_gl_color_buffer[gl_px_idx], bytes_per_pixel); +                } +            } +        } +    } +} + +void RasterizerOpenGL::CommitDepthBuffer() { +    if (last_fb_depth_addr != 0) { +        // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. +        u8* depth_buffer = Memory::GetPhysicalPointer(last_fb_depth_addr); + +        if (depth_buffer != nullptr) { +            u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); + +            // OpenGL needs 4 bpp alignment for D24 +            u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel; + +            std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]); + +            state.texture_units[0].enabled_2d = true; +            state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; +            state.Apply(); + +            glActiveTexture(GL_TEXTURE0); +            glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get()); + +            for (int y = 0; y < fb_depth_texture.height; ++y) { +                for (int x = 0; x < fb_depth_texture.width; ++x) { +                    const u32 coarse_y = y & ~7; +                    u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; +                    u32 gl_px_idx = x + y * fb_depth_texture.width; + +                    switch (fb_depth_texture.format) { +                    case Pica::Regs::DepthFormat::D16: +                        Color::EncodeD16(((u16*)temp_gl_depth_buffer.get())[gl_px_idx], depth_buffer + dst_offset); +                        break; +                    case Pica::Regs::DepthFormat::D24: +                        Color::EncodeD24(((u32*)temp_gl_depth_buffer.get())[gl_px_idx], depth_buffer + dst_offset); +                        break; +                    case Pica::Regs::DepthFormat::D24S8: +                    { +                        u32 depth_stencil = ((u32*)temp_gl_depth_buffer.get())[gl_px_idx]; +                        Color::EncodeD24S8((depth_stencil >> 8), depth_stencil & 0xFF, depth_buffer + dst_offset); +                        break; +                    } +                    default: +                        LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", fb_depth_texture.format); +                        UNIMPLEMENTED(); +                        break; +                    } +                } +            } +        } +    } +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h new file mode 100644 index 000000000..9896f8d04 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -0,0 +1,207 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/hwrasterizer_base.h" + +#include "gl_state.h" +#include "gl_rasterizer_cache.h" + +class RasterizerOpenGL : public HWRasterizer { +public: + +    RasterizerOpenGL(); +    ~RasterizerOpenGL() override; + +    /// Initialize API-specific GPU objects +    void InitObjects() override; + +    /// Reset the rasterizer, such as flushing all caches and updating all state +    void Reset() override; + +    /// Queues the primitive formed by the given vertices for rendering +    void AddTriangle(const Pica::VertexShader::OutputVertex& v0, +                     const Pica::VertexShader::OutputVertex& v1, +                     const Pica::VertexShader::OutputVertex& v2) override; + +    /// Draw the current batch of triangles +    void DrawTriangles() override; + +    /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer +    void CommitFramebuffer() override; + +    /// Notify rasterizer that the specified PICA register has been changed +    void NotifyPicaRegisterChanged(u32 id) override; + +    /// Notify rasterizer that the specified 3DS memory region will be read from after this notification +    void NotifyPreRead(PAddr addr, u32 size) override; + +    /// Notify rasterizer that a 3DS memory region has been changed +    void NotifyFlush(PAddr addr, u32 size) override; + +private: +    /// Structure used for managing texture environment states +    struct TEVConfigUniforms { +        GLuint enabled; +        GLuint color_sources; +        GLuint alpha_sources; +        GLuint color_modifiers; +        GLuint alpha_modifiers; +        GLuint color_alpha_op; +        GLuint color_alpha_multiplier; +        GLuint const_color; +        GLuint updates_combiner_buffer_color_alpha; +    }; + +    /// Structure used for storing information about color textures +    struct TextureInfo { +        OGLTexture texture; +        GLsizei width; +        GLsizei height; +        Pica::Regs::ColorFormat format; +        GLenum gl_format; +        GLenum gl_type; +    }; + +    /// Structure used for storing information about depth textures +    struct DepthTextureInfo { +        OGLTexture texture; +        GLsizei width; +        GLsizei height; +        Pica::Regs::DepthFormat format; +        GLenum gl_format; +        GLenum gl_type; +    }; + +    /// Structure that the hardware rendered vertices are composed of +    struct HardwareVertex { +        HardwareVertex(const Pica::VertexShader::OutputVertex& v) { +            position[0] = v.pos.x.ToFloat32(); +            position[1] = v.pos.y.ToFloat32(); +            position[2] = v.pos.z.ToFloat32(); +            position[3] = v.pos.w.ToFloat32(); +            color[0] = v.color.x.ToFloat32(); +            color[1] = v.color.y.ToFloat32(); +            color[2] = v.color.z.ToFloat32(); +            color[3] = v.color.w.ToFloat32(); +            tex_coord0[0] = v.tc0.x.ToFloat32(); +            tex_coord0[1] = v.tc0.y.ToFloat32(); +            tex_coord1[0] = v.tc1.x.ToFloat32(); +            tex_coord1[1] = v.tc1.y.ToFloat32(); +            tex_coord2[0] = v.tc2.x.ToFloat32(); +            tex_coord2[1] = v.tc2.y.ToFloat32(); +        } + +        GLfloat position[4]; +        GLfloat color[4]; +        GLfloat tex_coord0[2]; +        GLfloat tex_coord1[2]; +        GLfloat tex_coord2[2]; +    }; + +    /// Reconfigure the OpenGL color texture to use the given format and dimensions +    void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); + +    /// Reconfigure the OpenGL depth texture to use the given format and dimensions +    void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height); + +    /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer +    void SyncFramebuffer(); + +    /// Syncs the cull mode to match the PICA register +    void SyncCullMode(); + +    /// Syncs the blend enabled status to match the PICA register +    void SyncBlendEnabled(); + +    /// Syncs the blend functions to match the PICA register +    void SyncBlendFuncs(); + +    /// Syncs the blend color to match the PICA register +    void SyncBlendColor(); + +    /// Syncs the alpha test states to match the PICA register +    void SyncAlphaTest(); + +    /// Syncs the stencil test states to match the PICA register +    void SyncStencilTest(); + +    /// Syncs the depth test states to match the PICA register +    void SyncDepthTest(); + +    /// Syncs the specified TEV stage's color and alpha sources to match the PICA register +    void SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + +    /// Syncs the specified TEV stage's color and alpha modifiers to match the PICA register +    void SyncTevModifiers(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + +    /// Syncs the specified TEV stage's color and alpha combiner operations to match the PICA register +    void SyncTevOps(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + +    /// Syncs the specified TEV stage's constant color to match the PICA register +    void SyncTevColor(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + +    /// Syncs the specified TEV stage's color and alpha multipliers to match the PICA register +    void SyncTevMultipliers(unsigned stage_index, const Pica::Regs::TevStageConfig& config); + +    /// Syncs the TEV combiner color buffer to match the PICA register +    void SyncCombinerColor(); + +    /// Syncs the TEV combiner write flags to match the PICA register +    void SyncCombinerWriteFlags(); + +    /// Syncs the remaining OpenGL drawing state to match the current PICA state +    void SyncDrawState(); + +    /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture +    void ReloadColorBuffer(); + +    /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture +    void ReloadDepthBuffer(); + +    /** +     * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory +     * Loads the OpenGL framebuffer textures into temporary buffers +     * Then copies into the 3DS framebuffer using proper Morton order +     */ +    void CommitColorBuffer(); + +    /** +     * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory +     * Loads the OpenGL framebuffer textures into temporary buffers +     * Then copies into the 3DS framebuffer using proper Morton order +     */ +    void CommitDepthBuffer(); + +    RasterizerCacheOpenGL res_cache; + +    std::vector<HardwareVertex> vertex_batch; + +    OpenGLState state; + +    PAddr last_fb_color_addr; +    PAddr last_fb_depth_addr; + +    // Hardware rasterizer +    TextureInfo fb_color_texture; +    DepthTextureInfo fb_depth_texture; +    OGLShader shader; +    OGLVertexArray vertex_array; +    OGLBuffer vertex_buffer; +    OGLFramebuffer framebuffer; + +    // Hardware vertex shader +    GLuint attrib_position; +    GLuint attrib_color; +    GLuint attrib_texcoords; + +    // Hardware fragment shader +    GLuint uniform_alphatest_enabled; +    GLuint uniform_alphatest_func; +    GLuint uniform_alphatest_ref; +    GLuint uniform_tex; +    GLuint uniform_tev_combiner_buffer_color; +    TEVConfigUniforms uniform_tev_cfgs[6]; +}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp new file mode 100644 index 000000000..6f88a8b21 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -0,0 +1,77 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/make_unique.h" +#include "common/math_util.h" + +#include "core/memory.h" + +#include "video_core/renderer_opengl/gl_rasterizer_cache.h" +#include "video_core/renderer_opengl/pica_to_gl.h" +#include "video_core/debug_utils/debug_utils.h" +#include "video_core/math.h" + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { +    FullFlush(); +} + +void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { +    PAddr texture_addr = config.config.GetPhysicalAddress(); + +    const auto cached_texture = texture_cache.find(texture_addr); + +    if (cached_texture != texture_cache.end()) { +        state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; +        state.Apply(); +    } else { +        std::unique_ptr<CachedTexture> new_texture = Common::make_unique<CachedTexture>(); + +        new_texture->texture.Create(); +        state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; +        state.Apply(); + +        // TODO: Need to choose filters that correspond to PICA once register is declared +        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); +        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + +        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(config.config.wrap_s)); +        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(config.config.wrap_t)); + +        const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); + +        new_texture->width = info.width; +        new_texture->height = info.height; +        new_texture->size = info.width * info.height * Pica::Regs::NibblesPerPixel(info.format); + +        u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr); +        std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); + +        for (int y = 0; y < info.height; ++y) { +            for (int x = 0; x < info.width; ++x) { +                temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); +            } +        } + +        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); + +        texture_cache.emplace(texture_addr, std::move(new_texture)); +    } +} + +void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size) { +    // Flush any texture that falls in the flushed region +    // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound +    auto cache_upper_bound = texture_cache.upper_bound(addr + size); +    for (auto it = texture_cache.begin(); it != cache_upper_bound;) { +        if (MathUtil::IntervalsIntersect(addr, size, it->first, it->second->size)) { +            it = texture_cache.erase(it); +        } else { +            ++it; +        } +    } +} + +void RasterizerCacheOpenGL::FullFlush() { +    texture_cache.clear(); +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h new file mode 100644 index 000000000..96f3a925c --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -0,0 +1,36 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "gl_state.h" +#include "gl_resource_manager.h" +#include "video_core/pica.h" + +#include <memory> +#include <map> + +class RasterizerCacheOpenGL : NonCopyable { +public: +    ~RasterizerCacheOpenGL(); + +    /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) +    void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config); + +    /// Flush any cached resource that touches the flushed region +    void NotifyFlush(PAddr addr, u32 size); + +    /// Flush all cached OpenGL resources tracked by this cache manager +    void FullFlush(); + +private: +    struct CachedTexture { +        OGLTexture texture; +        GLuint width; +        GLuint height; +        u32 size; +    }; + +    std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; +}; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp new file mode 100644 index 000000000..8f4ae28a4 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -0,0 +1,111 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" + +// Textures +OGLTexture::OGLTexture() : handle(0) { +} + +OGLTexture::~OGLTexture() { +    Release(); +} + +void OGLTexture::Create() { +    if (handle != 0) { +        return; +    } + +    glGenTextures(1, &handle); +} + +void OGLTexture::Release() { +    glDeleteTextures(1, &handle); +    handle = 0; +} + +// Shaders +OGLShader::OGLShader() : handle(0) { +} + +OGLShader::~OGLShader() { +    Release(); +} + +void OGLShader::Create(const char* vert_shader, const char* frag_shader) { +    if (handle != 0) { +        return; +    } + +    handle = ShaderUtil::LoadShaders(vert_shader, frag_shader); +} + +void OGLShader::Release() { +    glDeleteProgram(handle); +    handle = 0; +} + +// Buffer objects +OGLBuffer::OGLBuffer() : handle(0) { +} + +OGLBuffer::~OGLBuffer() { +    Release(); +} + +void OGLBuffer::Create() { +    if (handle != 0) { +        return; +    } + +    glGenBuffers(1, &handle); +} + +void OGLBuffer::Release() { +    glDeleteBuffers(1, &handle); +    handle = 0; +} + +// Vertex array objects +OGLVertexArray::OGLVertexArray() : handle(0) { +} + +OGLVertexArray::~OGLVertexArray() { +    Release(); +} + +void OGLVertexArray::Create() { +    if (handle != 0) { +        return; +    } + +    glGenVertexArrays(1, &handle); +} + +void OGLVertexArray::Release() { +    glDeleteVertexArrays(1, &handle); +    handle = 0; +} + +// Framebuffers +OGLFramebuffer::OGLFramebuffer() : handle(0) { +} + +OGLFramebuffer::~OGLFramebuffer() { +    Release(); +} + +void OGLFramebuffer::Create() { +    if (handle != 0) { +        return; +    } + +    glGenFramebuffers(1, &handle); +} + +void OGLFramebuffer::Release() { +    glDeleteFramebuffers(1, &handle); +    handle = 0; +} diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h new file mode 100644 index 000000000..975720d0a --- /dev/null +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -0,0 +1,79 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +#include "generated/gl_3_2_core.h" + +class OGLTexture : public NonCopyable { +public: +    OGLTexture(); +    ~OGLTexture(); + +    /// Creates a new internal OpenGL resource and stores the handle +    void Create(); + +    /// Deletes the internal OpenGL resource +    void Release(); + +    GLuint handle; +}; + +class OGLShader : public NonCopyable { +public: +    OGLShader(); +    ~OGLShader(); + +    /// Creates a new internal OpenGL resource and stores the handle +    void Create(const char* vert_shader, const char* frag_shader); + +    /// Deletes the internal OpenGL resource +    void Release(); + +    GLuint handle; +}; + +class OGLBuffer : public NonCopyable { +public: +    OGLBuffer(); +    ~OGLBuffer(); + +    /// Creates a new internal OpenGL resource and stores the handle +    void Create(); + +    /// Deletes the internal OpenGL resource +    void Release(); + +    GLuint handle; +}; + +class OGLVertexArray : public NonCopyable { +public: +    OGLVertexArray(); +    ~OGLVertexArray(); + +    /// Creates a new internal OpenGL resource and stores the handle +    void Create(); + +    /// Deletes the internal OpenGL resource +    void Release(); + +    GLuint handle; +}; + +class OGLFramebuffer : public NonCopyable { +public: +    OGLFramebuffer(); +    ~OGLFramebuffer(); + +    /// Creates a new internal OpenGL resource and stores the handle +    void Create(); + +    /// Deletes the internal OpenGL resource +    void Release(); + +    GLuint handle; +}; diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h index 746a37afe..8f0941230 100644 --- a/src/video_core/renderer_opengl/gl_shaders.h +++ b/src/video_core/renderer_opengl/gl_shaders.h @@ -42,4 +42,292 @@ void main() {  }  )"; +const char g_vertex_shader_hw[] = R"( +#version 150 core + +#define NUM_VTX_ATTR 7 + +in vec4 vert_position; +in vec4 vert_color; +in vec2 vert_texcoords[3]; + +out vec4 o[NUM_VTX_ATTR]; + +void main() { +    o[2] = vert_color; +    o[3] = vec4(vert_texcoords[0].xy, vert_texcoords[1].xy); +    o[5] = vec4(0.0, 0.0, vert_texcoords[2].xy); + +    gl_Position = vec4(vert_position.x, -vert_position.y, -vert_position.z, vert_position.w); +} +)"; + +// TODO: Create a shader constructor and cache that builds this program with minimal conditionals instead of using tev_cfg uniforms +const char g_fragment_shader_hw[] = R"( +#version 150 core + +#define NUM_VTX_ATTR 7 +#define NUM_TEV_STAGES 6 + +#define SOURCE_PRIMARYCOLOR         0x0 +#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1 +#define SOURCE_TEXTURE0             0x3 +#define SOURCE_TEXTURE1             0x4 +#define SOURCE_TEXTURE2             0x5 +#define SOURCE_TEXTURE3             0x6 +#define SOURCE_PREVIOUSBUFFER       0xd +#define SOURCE_CONSTANT             0xe +#define SOURCE_PREVIOUS             0xf + +#define COLORMODIFIER_SOURCECOLOR         0x0 +#define COLORMODIFIER_ONEMINUSSOURCECOLOR 0x1 +#define COLORMODIFIER_SOURCEALPHA         0x2 +#define COLORMODIFIER_ONEMINUSSOURCEALPHA 0x3 +#define COLORMODIFIER_SOURCERED           0x4 +#define COLORMODIFIER_ONEMINUSSOURCERED   0x5 +#define COLORMODIFIER_SOURCEGREEN         0x8 +#define COLORMODIFIER_ONEMINUSSOURCEGREEN 0x9 +#define COLORMODIFIER_SOURCEBLUE          0xc +#define COLORMODIFIER_ONEMINUSSOURCEBLUE  0xd + +#define ALPHAMODIFIER_SOURCEALPHA         0x0 +#define ALPHAMODIFIER_ONEMINUSSOURCEALPHA 0x1 +#define ALPHAMODIFIER_SOURCERED           0x2 +#define ALPHAMODIFIER_ONEMINUSSOURCERED   0x3 +#define ALPHAMODIFIER_SOURCEGREEN         0x4 +#define ALPHAMODIFIER_ONEMINUSSOURCEGREEN 0x5 +#define ALPHAMODIFIER_SOURCEBLUE          0x6 +#define ALPHAMODIFIER_ONEMINUSSOURCEBLUE  0x7 + +#define OPERATION_REPLACE         0 +#define OPERATION_MODULATE        1 +#define OPERATION_ADD             2 +#define OPERATION_ADDSIGNED       3 +#define OPERATION_LERP            4 +#define OPERATION_SUBTRACT        5 +#define OPERATION_MULTIPLYTHENADD 8 +#define OPERATION_ADDTHENMULTIPLY 9 + +#define COMPAREFUNC_NEVER              0 +#define COMPAREFUNC_ALWAYS             1 +#define COMPAREFUNC_EQUAL              2 +#define COMPAREFUNC_NOTEQUAL           3 +#define COMPAREFUNC_LESSTHAN           4 +#define COMPAREFUNC_LESSTHANOREQUAL    5 +#define COMPAREFUNC_GREATERTHAN        6 +#define COMPAREFUNC_GREATERTHANOREQUAL 7 + +in vec4 o[NUM_VTX_ATTR]; +out vec4 color; + +uniform bool alphatest_enabled; +uniform int alphatest_func; +uniform float alphatest_ref; + +uniform sampler2D tex[3]; + +uniform vec4 tev_combiner_buffer_color; + +struct TEVConfig +{ +    bool enabled; +    ivec3 color_sources; +    ivec3 alpha_sources; +    ivec3 color_modifiers; +    ivec3 alpha_modifiers; +    ivec2 color_alpha_op; +    ivec2 color_alpha_multiplier; +    vec4 const_color; +    bvec2 updates_combiner_buffer_color_alpha; +}; + +uniform TEVConfig tev_cfgs[NUM_TEV_STAGES]; + +vec4 g_combiner_buffer; +vec4 g_last_tex_env_out; +vec4 g_const_color; + +vec4 GetSource(int source) { +    if (source == SOURCE_PRIMARYCOLOR) { +        return o[2]; +    } else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) { +        // HACK: Uses color value, but should really use fragment lighting output +        return o[2]; +    } else if (source == SOURCE_TEXTURE0) { +        return texture(tex[0], o[3].xy); +    } else if (source == SOURCE_TEXTURE1) { +        return texture(tex[1], o[3].zw); +    } else if (source == SOURCE_TEXTURE2) { +        // TODO: Unverified +        return texture(tex[2], o[5].zw); +    } else if (source == SOURCE_TEXTURE3) { +        // TODO: no 4th texture? +    } else if (source == SOURCE_PREVIOUSBUFFER) { +        return g_combiner_buffer; +    } else if (source == SOURCE_CONSTANT) { +        return g_const_color; +    } else if (source == SOURCE_PREVIOUS) { +        return g_last_tex_env_out; +    } + +    return vec4(0.0); +} + +vec3 GetColorModifier(int factor, vec4 color) { +    if (factor == COLORMODIFIER_SOURCECOLOR) { +        return color.rgb; +    } else if (factor == COLORMODIFIER_ONEMINUSSOURCECOLOR) { +        return vec3(1.0) - color.rgb; +    } else if (factor == COLORMODIFIER_SOURCEALPHA) { +        return color.aaa; +    } else if (factor == COLORMODIFIER_ONEMINUSSOURCEALPHA) { +        return vec3(1.0) - color.aaa; +    } else if (factor == COLORMODIFIER_SOURCERED) { +        return color.rrr; +    } else if (factor == COLORMODIFIER_ONEMINUSSOURCERED) { +        return vec3(1.0) - color.rrr; +    } else if (factor == COLORMODIFIER_SOURCEGREEN) { +        return color.ggg; +    } else if (factor == COLORMODIFIER_ONEMINUSSOURCEGREEN) { +        return vec3(1.0) - color.ggg; +    } else if (factor == COLORMODIFIER_SOURCEBLUE) { +        return color.bbb; +    } else if (factor == COLORMODIFIER_ONEMINUSSOURCEBLUE) { +        return vec3(1.0) - color.bbb; +    } + +    return vec3(0.0); +} + +float GetAlphaModifier(int factor, vec4 color) { +    if (factor == ALPHAMODIFIER_SOURCEALPHA) { +        return color.a; +    } else if (factor == ALPHAMODIFIER_ONEMINUSSOURCEALPHA) { +        return 1.0 - color.a; +    } else if (factor == ALPHAMODIFIER_SOURCERED) { +        return color.r; +    } else if (factor == ALPHAMODIFIER_ONEMINUSSOURCERED) { +        return 1.0 - color.r; +    } else if (factor == ALPHAMODIFIER_SOURCEGREEN) { +        return color.g; +    } else if (factor == ALPHAMODIFIER_ONEMINUSSOURCEGREEN) { +        return 1.0 - color.g; +    } else if (factor == ALPHAMODIFIER_SOURCEBLUE) { +        return color.b; +    } else if (factor == ALPHAMODIFIER_ONEMINUSSOURCEBLUE) { +        return 1.0 - color.b; +    } + +    return 0.0; +} + +vec3 ColorCombine(int op, vec3 color[3]) { +    if (op == OPERATION_REPLACE) { +        return color[0]; +    } else if (op == OPERATION_MODULATE) { +        return color[0] * color[1]; +    } else if (op == OPERATION_ADD) { +        return min(color[0] + color[1], 1.0); +    } else if (op == OPERATION_ADDSIGNED) { +        return clamp(color[0] + color[1] - vec3(0.5), 0.0, 1.0); +    } else if (op == OPERATION_LERP) { +        return color[0] * color[2] + color[1] * (vec3(1.0) - color[2]); +    } else if (op == OPERATION_SUBTRACT) { +        return max(color[0] - color[1], 0.0); +    } else if (op == OPERATION_MULTIPLYTHENADD) { +        return min(color[0] * color[1] + color[2], 1.0); +    } else if (op == OPERATION_ADDTHENMULTIPLY) { +        return min(color[0] + color[1], 1.0) * color[2]; +    } + +    return vec3(0.0); +} + +float AlphaCombine(int op, float alpha[3]) { +    if (op == OPERATION_REPLACE) { +        return alpha[0]; +    } else if (op == OPERATION_MODULATE) { +        return alpha[0] * alpha[1]; +    } else if (op == OPERATION_ADD) { +        return min(alpha[0] + alpha[1], 1.0); +    } else if (op == OPERATION_ADDSIGNED) { +        return clamp(alpha[0] + alpha[1] - 0.5, 0.0, 1.0); +    } else if (op == OPERATION_LERP) { +        return alpha[0] * alpha[2] + alpha[1] * (1.0 - alpha[2]); +    } else if (op == OPERATION_SUBTRACT) { +        return max(alpha[0] - alpha[1], 0.0); +    } else if (op == OPERATION_MULTIPLYTHENADD) { +        return min(alpha[0] * alpha[1] + alpha[2], 1.0); +    } else if (op == OPERATION_ADDTHENMULTIPLY) { +        return min(alpha[0] + alpha[1], 1.0) * alpha[2]; +    } + +    return 0.0; +} + +void main(void) { +    g_combiner_buffer = tev_combiner_buffer_color; + +    for (int tex_env_idx = 0; tex_env_idx < NUM_TEV_STAGES; ++tex_env_idx) { +        if (tev_cfgs[tex_env_idx].enabled) { +            g_const_color = tev_cfgs[tex_env_idx].const_color; + +            vec3 color_results[3] = vec3[3](GetColorModifier(tev_cfgs[tex_env_idx].color_modifiers.x, GetSource(tev_cfgs[tex_env_idx].color_sources.x)), +                                            GetColorModifier(tev_cfgs[tex_env_idx].color_modifiers.y, GetSource(tev_cfgs[tex_env_idx].color_sources.y)), +                                            GetColorModifier(tev_cfgs[tex_env_idx].color_modifiers.z, GetSource(tev_cfgs[tex_env_idx].color_sources.z))); +            vec3 color_output = ColorCombine(tev_cfgs[tex_env_idx].color_alpha_op.x, color_results); + +            float alpha_results[3] = float[3](GetAlphaModifier(tev_cfgs[tex_env_idx].alpha_modifiers.x, GetSource(tev_cfgs[tex_env_idx].alpha_sources.x)), +                                              GetAlphaModifier(tev_cfgs[tex_env_idx].alpha_modifiers.y, GetSource(tev_cfgs[tex_env_idx].alpha_sources.y)), +                                              GetAlphaModifier(tev_cfgs[tex_env_idx].alpha_modifiers.z, GetSource(tev_cfgs[tex_env_idx].alpha_sources.z))); +            float alpha_output = AlphaCombine(tev_cfgs[tex_env_idx].color_alpha_op.y, alpha_results); + +            g_last_tex_env_out = vec4(min(color_output * tev_cfgs[tex_env_idx].color_alpha_multiplier.x, 1.0), min(alpha_output * tev_cfgs[tex_env_idx].color_alpha_multiplier.y, 1.0)); +        } + +        if (tev_cfgs[tex_env_idx].updates_combiner_buffer_color_alpha.x) { +            g_combiner_buffer.rgb = g_last_tex_env_out.rgb; +        } + +        if (tev_cfgs[tex_env_idx].updates_combiner_buffer_color_alpha.y) { +            g_combiner_buffer.a = g_last_tex_env_out.a; +        } +    } + +    if (alphatest_enabled) { +        if (alphatest_func == COMPAREFUNC_NEVER) { +            discard; +        } else if (alphatest_func == COMPAREFUNC_ALWAYS) { + +        } else if (alphatest_func == COMPAREFUNC_EQUAL) { +            if (g_last_tex_env_out.a != alphatest_ref) { +                discard; +            } +        } else if (alphatest_func == COMPAREFUNC_NOTEQUAL) { +            if (g_last_tex_env_out.a == alphatest_ref) { +                discard; +            } +        } else if (alphatest_func == COMPAREFUNC_LESSTHAN) { +            if (g_last_tex_env_out.a >= alphatest_ref) { +                discard; +            } +        } else if (alphatest_func == COMPAREFUNC_LESSTHANOREQUAL) { +            if (g_last_tex_env_out.a > alphatest_ref) { +                discard; +            } +        } else if (alphatest_func == COMPAREFUNC_GREATERTHAN) { +            if (g_last_tex_env_out.a <= alphatest_ref) { +                discard; +            } +        } else if (alphatest_func == COMPAREFUNC_GREATERTHANOREQUAL) { +            if (g_last_tex_env_out.a < alphatest_ref) { +                discard; +            } +        } +    } + +    color = g_last_tex_env_out; +} +)"; +  } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp new file mode 100644 index 000000000..ca8a371e7 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -0,0 +1,160 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/pica.h" + +OpenGLState OpenGLState::cur_state; + +OpenGLState::OpenGLState() { +    // These all match default OpenGL values +    cull.enabled = false; +    cull.mode = GL_BACK; + +    depth.test_enabled = false; +    depth.test_func = GL_LESS; +    depth.write_mask = GL_TRUE; + +    stencil.test_enabled = false; +    stencil.test_func = GL_ALWAYS; +    stencil.test_ref = 0; +    stencil.test_mask = -1; +    stencil.write_mask = -1; + +    blend.enabled = false; +    blend.src_rgb_func = GL_ONE; +    blend.dst_rgb_func = GL_ZERO; +    blend.src_a_func = GL_ONE; +    blend.dst_a_func = GL_ZERO; +    blend.color.red = 0.0f; +    blend.color.green = 0.0f; +    blend.color.blue = 0.0f; +    blend.color.alpha = 0.0f; + +    for (auto& texture_unit : texture_units) { +        texture_unit.enabled_2d = false; +        texture_unit.texture_2d = 0; +    } + +    draw.framebuffer = 0; +    draw.vertex_array = 0; +    draw.vertex_buffer = 0; +    draw.shader_program = 0; +} + +const void OpenGLState::Apply() { +    // Culling +    if (cull.enabled != cur_state.cull.enabled) { +        if (cull.enabled) { +            glEnable(GL_CULL_FACE); +        } else { +            glDisable(GL_CULL_FACE); +        } +    } + +    if (cull.mode != cur_state.cull.mode) { +        glCullFace(cull.mode); +    } + +    // Depth test +    if (depth.test_enabled != cur_state.depth.test_enabled) { +        if (depth.test_enabled) { +            glEnable(GL_DEPTH_TEST); +        } else { +            glDisable(GL_DEPTH_TEST); +        } +    } + +    if (depth.test_func != cur_state.depth.test_func) { +        glDepthFunc(depth.test_func); +    } + +    // Depth mask +    if (depth.write_mask != cur_state.depth.write_mask) { +        glDepthMask(depth.write_mask); +    } + +    // Stencil test +    if (stencil.test_enabled != cur_state.stencil.test_enabled) { +        if (stencil.test_enabled) { +            glEnable(GL_STENCIL_TEST); +        } else { +            glDisable(GL_STENCIL_TEST); +        } +    }  + +    if (stencil.test_func != cur_state.stencil.test_func || +        stencil.test_ref != cur_state.stencil.test_ref || +        stencil.test_mask != cur_state.stencil.test_mask) { +        glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); +    } + +    // Stencil mask +    if (stencil.write_mask != cur_state.stencil.write_mask) { +        glStencilMask(stencil.write_mask); +    } + +    // Blending +    if (blend.enabled != cur_state.blend.enabled) { +        if (blend.enabled) { +            glEnable(GL_BLEND); +        } else { +            glDisable(GL_BLEND); +        } +    } + +    if (blend.color.red != cur_state.blend.color.red || +        blend.color.green != cur_state.blend.color.green || +        blend.color.blue != cur_state.blend.color.blue || +        blend.color.alpha != cur_state.blend.color.alpha) { +        glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha); +    } + +    if (blend.src_rgb_func != cur_state.blend.src_rgb_func || +        blend.dst_rgb_func != cur_state.blend.dst_rgb_func || +        blend.src_a_func != cur_state.blend.src_a_func || +        blend.dst_a_func != cur_state.blend.dst_a_func) { +        glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func); +    } + +    // Textures +    for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) { +        if (texture_units[texture_index].enabled_2d != texture_units[texture_index].enabled_2d) { +            glActiveTexture(GL_TEXTURE0 + texture_index); + +            if (texture_units[texture_index].enabled_2d) { +                glEnable(GL_TEXTURE_2D); +            } else { +                glDisable(GL_TEXTURE_2D); +            } +        } + +        if (texture_units[texture_index].texture_2d != cur_state.texture_units[texture_index].texture_2d) { +            glActiveTexture(GL_TEXTURE0 + texture_index); +            glBindTexture(GL_TEXTURE_2D, texture_units[texture_index].texture_2d); +        } +    } + +    // Framebuffer +    if (draw.framebuffer != cur_state.draw.framebuffer) { +        glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); +    } + +    // Vertex array +    if (draw.vertex_array != cur_state.draw.vertex_array) { +        glBindVertexArray(draw.vertex_array); +    } + +    // Vertex buffer +    if (draw.vertex_buffer != cur_state.draw.vertex_buffer) { +        glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer); +    } + +    // Shader program +    if (draw.shader_program != cur_state.draw.shader_program) { +        glUseProgram(draw.shader_program); +    } + +    cur_state = *this; +} diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h new file mode 100644 index 000000000..a56d31371 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state.h @@ -0,0 +1,70 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "generated/gl_3_2_core.h" + +class OpenGLState { +public: +    struct { +        bool enabled; // GL_CULL_FACE +        GLenum mode; // GL_CULL_FACE_MODE +    } cull; + +    struct { +        bool test_enabled; // GL_DEPTH_TEST +        GLenum test_func; // GL_DEPTH_FUNC +        GLboolean write_mask; // GL_DEPTH_WRITEMASK +    } depth; + +    struct { +        bool test_enabled; // GL_STENCIL_TEST +        GLenum test_func; // GL_STENCIL_FUNC +        GLint test_ref; // GL_STENCIL_REF +        GLuint test_mask; // GL_STENCIL_VALUE_MASK +        GLuint write_mask; // GL_STENCIL_WRITEMASK +    } stencil; + +    struct { +        bool enabled; // GL_BLEND +        GLenum src_rgb_func; // GL_BLEND_SRC_RGB +        GLenum dst_rgb_func; // GL_BLEND_DST_RGB +        GLenum src_a_func; // GL_BLEND_SRC_ALPHA +        GLenum dst_a_func; // GL_BLEND_DST_ALPHA + +        struct { +            GLclampf red; +            GLclampf green; +            GLclampf blue; +            GLclampf alpha; +        } color; // GL_BLEND_COLOR +    } blend; + +    // 3 texture units - one for each that is used in PICA fragment shader emulation +    struct { +        bool enabled_2d; // GL_TEXTURE_2D +        GLuint texture_2d; // GL_TEXTURE_BINDING_2D +    } texture_units[3]; + +    struct { +        GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING +        GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING +        GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING +        GLuint shader_program; // GL_CURRENT_PROGRAM +    } draw; + +    OpenGLState(); + +    /// Get the currently active OpenGL state +    static const OpenGLState& GetCurState() { +        return cur_state; +    } +     +    /// Apply this state as the current OpenGL state +    const void Apply(); + +private: +    static OpenGLState cur_state; +}; diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h new file mode 100644 index 000000000..8369c649e --- /dev/null +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -0,0 +1,105 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +#include "video_core/pica.h" + +#include "generated/gl_3_2_core.h" + +namespace PicaToGL { + +inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { +    static const GLenum wrap_mode_table[] = { +        GL_CLAMP_TO_EDGE,  // WrapMode::ClampToEdge +        0,                 // Unknown +        GL_REPEAT,         // WrapMode::Repeat +        GL_MIRRORED_REPEAT // WrapMode::MirroredRepeat +    }; + +    // Range check table for input +    if (mode >= ARRAY_SIZE(wrap_mode_table)) { +        LOG_CRITICAL(Render_OpenGL, "Unknown texture wrap mode %d", mode); +        UNREACHABLE(); + +        return GL_CLAMP_TO_EDGE; +    } + +    GLenum gl_mode = wrap_mode_table[mode]; + +    // Check for dummy values indicating an unknown mode +    if (gl_mode == 0) { +        LOG_CRITICAL(Render_OpenGL, "Unknown texture wrap mode %d", mode); +        UNIMPLEMENTED(); + +        return GL_CLAMP_TO_EDGE; +    } + +    return gl_mode; +} + +inline GLenum BlendFunc(u32 factor) { +    static const GLenum blend_func_table[] = { +        GL_ZERO,                     // BlendFactor::Zero +        GL_ONE,                      // BlendFactor::One +        GL_SRC_COLOR,                // BlendFactor::SourceColor +        GL_ONE_MINUS_SRC_COLOR,      // BlendFactor::OneMinusSourceColor +        GL_DST_COLOR,                // BlendFactor::DestColor +        GL_ONE_MINUS_DST_COLOR,      // BlendFactor::OneMinusDestColor +        GL_SRC_ALPHA,                // BlendFactor::SourceAlpha +        GL_ONE_MINUS_SRC_ALPHA,      // BlendFactor::OneMinusSourceAlpha +        GL_DST_ALPHA,                // BlendFactor::DestAlpha +        GL_ONE_MINUS_DST_ALPHA,      // BlendFactor::OneMinusDestAlpha +        GL_CONSTANT_COLOR,           // BlendFactor::ConstantColor +        GL_ONE_MINUS_CONSTANT_COLOR, // BlendFactor::OneMinusConstantColor +        GL_CONSTANT_ALPHA,           // BlendFactor::ConstantAlpha +        GL_ONE_MINUS_CONSTANT_ALPHA, // BlendFactor::OneMinusConstantAlpha +        GL_SRC_ALPHA_SATURATE,       // BlendFactor::SourceAlphaSaturate +    }; + +    // Range check table for input +    if (factor >= ARRAY_SIZE(blend_func_table)) { +        LOG_CRITICAL(Render_OpenGL, "Unknown blend factor %d", factor); +        UNREACHABLE(); + +        return GL_ONE; +    } + +    return blend_func_table[factor]; +} + +inline GLenum CompareFunc(u32 func) { +    static const GLenum compare_func_table[] = { +        GL_NEVER,    // CompareFunc::Never +        GL_ALWAYS,   // CompareFunc::Always +        GL_EQUAL,    // CompareFunc::Equal +        GL_NOTEQUAL, // CompareFunc::NotEqual +        GL_LESS,     // CompareFunc::LessThan +        GL_LEQUAL,   // CompareFunc::LessThanOrEqual +        GL_GREATER,  // CompareFunc::GreaterThan +        GL_GEQUAL,   // CompareFunc::GreaterThanOrEqual +    }; + +    // Range check table for input +    if (func >= ARRAY_SIZE(compare_func_table)) { +        LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func); +        UNREACHABLE(); + +        return GL_ALWAYS; +    } + +    return compare_func_table[func]; +} + +inline std::array<GLfloat, 4> ColorRGBA8(const u8* bytes) { +    return { { bytes[0] / 255.0f, +               bytes[1] / 255.0f, +               bytes[2] / 255.0f, +               bytes[3] / 255.0f +           } }; +} + +} // namespace diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 71ceb021b..16cf92e20 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -53,6 +53,7 @@ static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const  /// RendererOpenGL constructor  RendererOpenGL::RendererOpenGL() { +    hw_rasterizer.reset(new RasterizerOpenGL());      resolution_width  = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth);      resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight;  } @@ -63,7 +64,9 @@ RendererOpenGL::~RendererOpenGL() {  /// Swap buffers (render frame)  void RendererOpenGL::SwapBuffers() { -    render_window->MakeCurrent(); +    // Maintain the rasterizer's state as a priority +    OpenGLState prev_state = OpenGLState::GetCurState(); +    state.Apply();      for(int i : {0, 1}) {          const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; @@ -110,7 +113,19 @@ void RendererOpenGL::SwapBuffers() {      render_window->PollEvents();      render_window->SwapBuffers(); +    prev_state.Apply(); +      profiler.BeginFrame(); + +    bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; +    if (Settings::values.use_hw_renderer != hw_renderer_enabled) { +        // TODO: Save new setting value to config file for next startup +        Settings::values.use_hw_renderer = hw_renderer_enabled; + +        if (Settings::values.use_hw_renderer) { +            hw_rasterizer->Reset(); +        } +    }  }  /** @@ -139,7 +154,11 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&      // only allows rows to have a memory alignement of 4.      ASSERT(pixel_stride % 4 == 0); -    glBindTexture(GL_TEXTURE_2D, texture.handle); +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = texture.handle; +    state.Apply(); +     +    glActiveTexture(GL_TEXTURE0);      glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);      // Update existing texture @@ -151,7 +170,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&                      texture.gl_format, texture.gl_type, framebuffer_data);      glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); -    glBindTexture(GL_TEXTURE_2D, 0);  }  /** @@ -161,13 +179,15 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&   */  void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,                                                  const TextureInfo& texture) { -    glBindTexture(GL_TEXTURE_2D, texture.handle); +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = texture.handle; +    state.Apply(); +    glActiveTexture(GL_TEXTURE0);      u8 framebuffer_data[3] = { color_r, color_g, color_b };      // Update existing texture      glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); -    glBindTexture(GL_TEXTURE_2D, 0);  }  /** @@ -175,7 +195,6 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color   */  void RendererOpenGL::InitOpenGLObjects() {      glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); -    glDisable(GL_DEPTH_TEST);      // Link shaders and get variable locations      program_id = ShaderUtil::LoadShaders(GLShaders::g_vertex_shader, GLShaders::g_fragment_shader); @@ -189,10 +208,12 @@ void RendererOpenGL::InitOpenGLObjects() {      // Generate VAO      glGenVertexArrays(1, &vertex_array_handle); -    glBindVertexArray(vertex_array_handle); + +    state.draw.vertex_array = vertex_array_handle; +    state.draw.vertex_buffer = vertex_buffer_handle; +    state.Apply();      // Attach vertex data to VAO -    glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_handle);      glBufferData(GL_ARRAY_BUFFER, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);      glVertexAttribPointer(attrib_position,  2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, position));      glVertexAttribPointer(attrib_tex_coord, 2, GL_FLOAT, GL_FALSE, sizeof(ScreenRectVertex), (GLvoid*)offsetof(ScreenRectVertex, tex_coord)); @@ -206,14 +227,19 @@ void RendererOpenGL::InitOpenGLObjects() {          // Allocation of storage is deferred until the first frame, when we          // know the framebuffer size. -        glBindTexture(GL_TEXTURE_2D, texture.handle); +        state.texture_units[0].enabled_2d = true; +        state.texture_units[0].texture_2d = texture.handle; +        state.Apply(); + +        glActiveTexture(GL_TEXTURE0);          glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);          glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);          glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);          glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);          glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);      } -    glBindTexture(GL_TEXTURE_2D, 0); + +    hw_rasterizer->InitObjects();  }  void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, @@ -264,7 +290,11 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,          UNIMPLEMENTED();      } -    glBindTexture(GL_TEXTURE_2D, texture.handle); +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = texture.handle; +    state.Apply(); + +    glActiveTexture(GL_TEXTURE0);      glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,              texture.gl_format, texture.gl_type, nullptr);  } @@ -280,8 +310,10 @@ void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x          ScreenRectVertex(x+w, y+h, 0.f, 1.f),      }; -    glBindTexture(GL_TEXTURE_2D, texture.handle); -    glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_handle); +    state.texture_units[0].enabled_2d = true; +    state.texture_units[0].texture_2d = texture.handle; +    state.Apply(); +      glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());      glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);  } @@ -295,7 +327,8 @@ void RendererOpenGL::DrawScreens() {      glViewport(0, 0, layout.width, layout.height);      glClear(GL_COLOR_BUFFER_BIT); -    glUseProgram(program_id); +    state.draw.shader_program = program_id; +    state.Apply();      // Set projection matrix      std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cd782428e..2ec2e14ca 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -13,6 +13,8 @@  #include "core/hw/gpu.h"  #include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_rasterizer.h"  class EmuWindow; @@ -49,18 +51,18 @@ private:      };      void InitOpenGLObjects(); -    static void ConfigureFramebufferTexture(TextureInfo& texture, -                                            const GPU::Regs::FramebufferConfig& framebuffer); +    void ConfigureFramebufferTexture(TextureInfo& texture, +                                     const GPU::Regs::FramebufferConfig& framebuffer);      void DrawScreens();      void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h);      void UpdateFramerate();      // Loads framebuffer from emulated memory into the active OpenGL texture. -    static void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, -                                        const TextureInfo& texture); +    void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, +                                 const TextureInfo& texture);      // Fills active OpenGL texture with the given RGB color. -    static void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, -                                           const TextureInfo& texture); +    void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, +                                    const TextureInfo& texture);      /// Computes the viewport rectangle      MathUtil::Rectangle<unsigned> GetViewportExtent(); @@ -71,6 +73,8 @@ private:      int resolution_width;                         ///< Current resolution width      int resolution_height;                        ///< Current resolution height +    OpenGLState state; +      // OpenGL object IDs      GLuint vertex_array_handle;      GLuint vertex_buffer_handle; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 42e3bdd5b..d4d907d5e 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -6,6 +6,7 @@  #include "common/emu_window.h"  #include "core/core.h" +#include "core/settings.h"  #include "video_core/video_core.h"  #include "video_core/renderer_base.h" @@ -19,6 +20,8 @@ namespace VideoCore {  EmuWindow*      g_emu_window    = nullptr;     ///< Frontend emulator window  RendererBase*   g_renderer      = nullptr;     ///< Renderer plugin +std::atomic<bool> g_hw_renderer_enabled; +  /// Initialize the video core  void Init(EmuWindow* emu_window) {      g_emu_window = emu_window; diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index f885bec21..3f24df7bd 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -8,6 +8,8 @@  #include "renderer_base.h" +#include <atomic> +  ////////////////////////////////////////////////////////////////////////////////////////////////////  // Video Core namespace @@ -31,6 +33,9 @@ static const int kScreenBottomHeight    = 240;  ///< 3DS bottom screen height  extern RendererBase*   g_renderer;              ///< Renderer plugin  extern EmuWindow*      g_emu_window;            ///< Emu window +// TODO: Wrap this in a user settings struct along with any other graphics settings (often set from qt ui) +extern std::atomic<bool> g_hw_renderer_enabled; +  /// Start the video core  void Start(); | 
