From e542356d0cc37b61621da8d2b376d32407ec8eff Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 01:27:12 -0400 Subject: gl_shader_decompiler: Let OpenGL interpret floats. - Accuracy is lost in translation to string, e.g. with NaN. - Needed for Super Mario Odyssey. --- src/video_core/engines/shader_bytecode.h | 13 ++++--------- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 4 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c7e3fb4b1..0d33c5a5e 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -254,20 +254,15 @@ union Instruction { BitField<56, 1, u64> invert_b; } lop32i; - float GetImm20_19() const { - float result{}; + u32 GetImm20_19() const { u32 imm{static_cast(imm20_19)}; imm <<= 12; imm |= negate_imm ? 0x80000000 : 0; - std::memcpy(&result, &imm, sizeof(imm)); - return result; + return imm; } - float GetImm20_32() const { - float result{}; - s32 imm{static_cast(imm20_32)}; - std::memcpy(&result, &imm, sizeof(imm)); - return result; + u32 GetImm20_32() const { + return static_cast(imm20_32); } s32 GetSignedImm20_20() const { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e3217db81..1ff71d682 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -602,12 +602,12 @@ private: /// Generates code representing a 19-bit immediate value static std::string GetImmediate19(const Instruction& instr) { - return std::to_string(instr.alu.GetImm20_19()); + return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19()); } /// Generates code representing a 32-bit immediate value static std::string GetImmediate32(const Instruction& instr) { - return std::to_string(instr.alu.GetImm20_32()); + return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); } /// Generates code representing a texture sampler. -- cgit v1.2.3 From aaf8d9ac2f0ec6de5f0393cf5935481143c184bf Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 01:22:48 -0400 Subject: gl_rasterizer_cached: Implement RenderTargetFormat::B5G6R5_UNORM. - Used by Super Mario Odyssey. --- src/video_core/gpu.h | 1 + src/video_core/renderer_opengl/gl_rasterizer_cache.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 440505c9d..874eddd78 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -34,6 +34,7 @@ enum class RenderTargetFormat : u32 { RG16_FLOAT = 0xDE, R11G11B10_FLOAT = 0xE0, R32_FLOAT = 0xE5, + B5G6R5_UNORM = 0xE8, R16_FLOAT = 0xF2, R8_UNORM = 0xF3, }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0c6652c7a..4168129f9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -221,6 +221,8 @@ struct SurfaceParams { return PixelFormat::RG32F; case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return PixelFormat::R11FG11FB10F; + case Tegra::RenderTargetFormat::B5G6R5_UNORM: + return PixelFormat::B5G6R5; case Tegra::RenderTargetFormat::RGBA32_UINT: return PixelFormat::RGBA32UI; case Tegra::RenderTargetFormat::R8_UNORM: @@ -441,6 +443,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGB10_A2_UNORM: case Tegra::RenderTargetFormat::R8_UNORM: case Tegra::RenderTargetFormat::RG16_UNORM: + case Tegra::RenderTargetFormat::B5G6R5_UNORM: return ComponentType::UNorm; case Tegra::RenderTargetFormat::RG16_SNORM: return ComponentType::SNorm; -- cgit v1.2.3 From c120ed7d188f0d4160d7f5157edfb07f358d0ddc Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 01:09:44 -0400 Subject: maxwell_to_gl: Implement VertexAttribute::Size::Size_8_8. --- src/video_core/renderer_opengl/maxwell_to_gl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 16b1bd606..500d4d4b1 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -27,6 +27,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Type::UnsignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: -- cgit v1.2.3 From 8c6338b6f94543ba564a2ed3458fab50731d6c3b Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 01:08:27 -0400 Subject: renderer_opengl: Use trace log in a few places. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c2a931469..b87b87e03 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -161,7 +161,7 @@ std::pair RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, // assume every shader uses them all. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; - LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", + LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), attrib.offset.Value(), attrib.IsNormalized()); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bf9131193..899865e3b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -430,7 +430,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum break; case GL_DEBUG_SEVERITY_NOTIFICATION: case GL_DEBUG_SEVERITY_LOW: - LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); + LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message); break; } } -- cgit v1.2.3 From 57982df105a6d149cc82292541184e6ceabc288c Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 02:07:44 -0400 Subject: maxwell_3d: Use correct const buffer size and check bounds. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fixes mem corruption with Super Mario Odyssey and Pokkén Tournament DX. --- src/video_core/engines/maxwell_3d.cpp | 2 ++ src/video_core/engines/maxwell_3d.h | 2 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 ++++- src/video_core/renderer_opengl/gl_state.h | 6 +++++- 4 files changed, 12 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5c0ae8009..ed22a2090 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -238,6 +238,8 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { auto& buffer = shader.const_buffers[bind_data.index]; + ASSERT(bind_data.index < Regs::MaxConstBuffers); + buffer.enabled = bind_data.valid.Value() != 0; buffer.index = bind_data.index; buffer.address = regs.const_buffer.BufferAddress(); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 4d0ff96a5..0506ac8fe 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -44,7 +44,7 @@ public: static constexpr size_t MaxShaderProgram = 6; static constexpr size_t MaxShaderStage = 5; // Maximum number of const buffers per shader stage. - static constexpr size_t MaxConstBuffers = 16; + static constexpr size_t MaxConstBuffers = 18; enum class QueryMode : u32 { Write = 0, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c2a931469..601a1084b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -659,7 +659,10 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr auto& buffer_draw_state = state.draw.const_buffers[static_cast(stage)][used_buffer.GetIndex()]; - ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer"); + if (!buffer.enabled) { + continue; + } + buffer_draw_state.enabled = true; buffer_draw_state.bindpoint = current_bindpoint + bindpoint; diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 24b1d956b..5c7b636e4 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -7,6 +7,10 @@ #include #include +#include "video_core/engines/maxwell_3d.h" + +using Regs = Tegra::Engines::Maxwell3D::Regs; + namespace TextureUnits { struct TextureUnit { @@ -120,7 +124,7 @@ public: GLuint bindpoint; GLuint ssbo; }; - std::array, 5> const_buffers{}; + std::array, 5> const_buffers; } draw; struct { -- cgit v1.2.3 From 7f0d0a93f74c4a1a76281ccdd4b985e50b89d440 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 02:25:00 -0400 Subject: gl_shader_decompiler: Stub input attribute Unknown_63. --- src/video_core/engines/shader_bytecode.h | 2 ++ src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c7e3fb4b1..42147588c 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -78,6 +78,8 @@ union Attribute { // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval // shader. TessCoordInstanceIDVertexID = 47, + // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this. + Unknown_63 = 63, }; union { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e3217db81..724512000 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -523,6 +523,11 @@ private: // shader. ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))"; + case Attribute::Index::Unknown_63: + // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this. + LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63"); + UNREACHABLE(); + break; default: const u32 index{static_cast(attribute) - static_cast(Attribute::Index::Attribute_0)}; @@ -534,6 +539,8 @@ private: LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); UNREACHABLE(); } + + return "vec4(0, 0, 0, 0)"; } /// Generates code representing an output attribute register. -- cgit v1.2.3 From 7bf422d58c49e6d52018f622ac79f09b90c853a4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 02:40:04 -0400 Subject: gpu: Add R11G11B10_FLOAT to RenderTargetBytesPerPixel. - Used by Super Mario Odyssey. --- src/video_core/gpu.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b2a83ce0b..4ff4d71c5 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -42,6 +42,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RGB10_A2_UNORM: case RenderTargetFormat::BGRA8_UNORM: case RenderTargetFormat::R32_FLOAT: + case RenderTargetFormat::R11G11B10_FLOAT: return 4; default: UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast(format)); -- cgit v1.2.3 From ddec200290a4e6a4e4613ccf306a2d68e6e29707 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 02:35:00 -0400 Subject: gl_rasterizer: Do not render when no render target is configured. - Used by Super Mario Odyssey. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b87b87e03..039e9e0ca 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -324,6 +324,11 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers(bool using_c bool using_depth_fb) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) { + LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured"); + using_color_fb = false; + } + // TODO(bunnei): Implement this const bool has_stencil = false; -- cgit v1.2.3 From 557c4669945fa7a30d3c6af25ce383d507232a7e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 8 Aug 2018 23:14:54 -0400 Subject: gl_rasterizer_cache: Make pointer const in LoadGLBuffer() This is only ever read from, so we can make the data it's pointing to const. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 257aa9571..ecc84293e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -447,7 +447,7 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 void CachedSurface::LoadGLBuffer() { ASSERT(params.type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); + const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); ASSERT(texture_src_data); -- cgit v1.2.3 From efe6b473c5b16d57e0bc6535e43fdafff23e6438 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 23:22:45 -0400 Subject: maxwell_3d: Ignore macros that have not been uploaded yet. - Used by Super Mario Odyssey (in game). --- src/video_core/engines/maxwell_3d.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ed22a2090..a46ed4bd7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -23,12 +23,17 @@ Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {} void Maxwell3D::CallMacroMethod(u32 method, std::vector parameters) { - auto macro_code = uploaded_macros.find(method); + // Reset the current macro. + executing_macro = 0; + // The requested macro must have been uploaded already. - ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method); + auto macro_code = uploaded_macros.find(method); + if (macro_code == uploaded_macros.end()) { + LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); + return; + } - // Reset the current macro and execute it. - executing_macro = 0; + // Execute the current macro. macro_interpreter.Execute(macro_code->second, std::move(parameters)); } -- cgit v1.2.3 From 4283019aa0928f8bf564b0031c21b1231e08f8e2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 21:51:09 -0400 Subject: gl_shader_decompiler: Declare predicates on use. - Used by Super Mario Odyssey (when going in game). --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index dd240a4ce..ea7779429 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -657,16 +657,17 @@ private: * @param instr Instruction to generate the if condition for. * @returns string containing the predicate condition. */ - std::string GetPredicateCondition(u64 index, bool negate) const { + std::string GetPredicateCondition(u64 index, bool negate) { using Tegra::Shader::Pred; std::string variable; // Index 7 is used as an 'Always True' condition. - if (index == static_cast(Pred::UnusedIndex)) + if (index == static_cast(Pred::UnusedIndex)) { variable = "true"; - else + } else { variable = 'p' + std::to_string(index) + '_' + suffix; - + declr_predicates.insert(variable); + } if (negate) { return "!(" + variable + ')'; } -- cgit v1.2.3 From 06d0b96ca9b25b26f59e965e23e2cc7491c6ce66 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 22:16:27 -0400 Subject: maxwell_to_gl: Implement PrimitiveTopology::Points. - Used by Super Mario Odyssey (in game). --- src/video_core/renderer_opengl/maxwell_to_gl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 500d4d4b1..f57464981 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -85,6 +85,8 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return GL_POINTS; case Maxwell::PrimitiveTopology::Triangles: return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: -- cgit v1.2.3 From dfc3eed0cbbf86af0e4d644b8a444f6ea29a1914 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 8 Aug 2018 22:07:16 -0400 Subject: maxwell_to_gl: Implement VertexAttribute::Size::Size_16_16_16_16. - Used by Super Mario Odyssey (in game). --- src/video_core/renderer_opengl/maxwell_to_gl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f57464981..43be69dd1 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -31,6 +31,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_UNSIGNED_SHORT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; -- cgit v1.2.3 From 434f352eb37fba2a5c80bead61a8c5593785730d Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 8 Aug 2018 23:28:01 -0400 Subject: gl_rasterizer_cache: Use std::vector::assign in LoadGLBuffer() for the non-tiled case resize() causes the vector to expand and zero out the added members to the vector, however we can avoid this zeroing by using assign(). Given we have the pointer to the data we want to copy, we can calculate the end pointer and directly copy the range of data without the need to perform the resize() beforehand. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ecc84293e..9efb5cea4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -451,16 +451,18 @@ void CachedSurface::LoadGLBuffer() { ASSERT(texture_src_data); - gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); + const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); + const u32 copy_size = params.width * params.height * bytes_per_pixel; MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); if (!params.is_tiled) { - const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; + const u8* const texture_src_data_end = texture_src_data + copy_size; - std::memcpy(gl_buffer.data(), texture_src_data, - bytes_per_pixel * params.width * params.height); + gl_buffer.assign(texture_src_data, texture_src_data_end); } else { + gl_buffer.resize(copy_size); + morton_to_gl_fns[static_cast(params.pixel_format)]( params.width, params.block_height, params.height, gl_buffer.data(), params.addr); } -- cgit v1.2.3 From e831b80d699b5912597c572f197a879bcdfab45a Mon Sep 17 00:00:00 2001 From: Lioncash Date: Wed, 8 Aug 2018 23:30:53 -0400 Subject: gl_rasterizer_cache: Invert conditional in LoadGLBuffer() It's generally easier to follow code using conditionals that operate in terms of the true case followed by the false case (no chance of overlooking the exclamation mark). --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 9efb5cea4..9b202e5c3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -456,15 +456,15 @@ void CachedSurface::LoadGLBuffer() { MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - if (!params.is_tiled) { - const u8* const texture_src_data_end = texture_src_data + copy_size; - - gl_buffer.assign(texture_src_data, texture_src_data_end); - } else { + if (params.is_tiled) { gl_buffer.resize(copy_size); morton_to_gl_fns[static_cast(params.pixel_format)]( params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + } else { + const u8* const texture_src_data_end = texture_src_data + copy_size; + + gl_buffer.assign(texture_src_data, texture_src_data_end); } ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); -- cgit v1.2.3 From 5cb6eceecfe1b631f8da5b41559076c8a880e26a Mon Sep 17 00:00:00 2001 From: Khangaroo Date: Thu, 9 Aug 2018 12:57:13 -0400 Subject: Implement BC5/DXN2 (#996) - Used by Kirby Star Allies. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 26 ++++++------ .../renderer_opengl/gl_rasterizer_cache.h | 49 ++++++++++++---------- src/video_core/textures/decoders.cpp | 3 ++ 3 files changed, 45 insertions(+), 33 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 257aa9571..2e68dab11 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -109,6 +109,7 @@ static constexpr std::array tex_form {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT45 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 + {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN2 {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 @@ -218,17 +219,17 @@ static constexpr std::array, MortonCopy, MortonCopy, MortonCopy, MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, }; static constexpr std::array, MortonCopy, MortonCopy, - // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/BC7U/ASTC_2D_4X4 formats is not supported + // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not + // supported nullptr, nullptr, nullptr, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 4168129f9..6f01b2bf0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -35,31 +35,32 @@ struct SurfaceParams { DXT23 = 9, DXT45 = 10, DXN1 = 11, // This is also known as BC4 - BC7U = 12, - ASTC_2D_4X4 = 13, - G8R8 = 14, - BGRA8 = 15, - RGBA32F = 16, - RG32F = 17, - R32F = 18, - R16F = 19, - R16UNORM = 20, - RG16 = 21, - RG16F = 22, - RG16UI = 23, - RG16I = 24, - RG16S = 25, - RGB32F = 26, - SRGBA8 = 27, + DXN2 = 12, // This is also known as BC5 + BC7U = 13, + ASTC_2D_4X4 = 14, + G8R8 = 15, + BGRA8 = 16, + RGBA32F = 17, + RG32F = 18, + R32F = 19, + R16F = 20, + R16UNORM = 21, + RG16 = 22, + RG16F = 23, + RG16UI = 24, + RG16I = 25, + RG16S = 26, + RGB32F = 27, + SRGBA8 = 28, MaxColorFormat, // DepthStencil formats - Z24S8 = 28, - S8Z24 = 29, - Z32F = 30, - Z16 = 31, - Z32FS8 = 32, + Z24S8 = 29, + S8Z24 = 30, + Z32F = 31, + Z16 = 32, + Z32FS8 = 33, MaxDepthStencilFormat, @@ -109,6 +110,7 @@ struct SurfaceParams { 4, // DXT23 4, // DXT45 4, // DXN1 + 4, // DXN2 4, // BC7U 4, // ASTC_2D_4X4 1, // G8R8 @@ -153,6 +155,7 @@ struct SurfaceParams { 128, // DXT23 128, // DXT45 64, // DXN1 + 128, // DXN2 128, // BC7U 32, // ASTC_2D_4X4 16, // G8R8 @@ -305,6 +308,8 @@ struct SurfaceParams { return PixelFormat::DXT45; case Tegra::Texture::TextureFormat::DXN1: return PixelFormat::DXN1; + case Tegra::Texture::TextureFormat::DXN2: + return PixelFormat::DXN2; case Tegra::Texture::TextureFormat::BC7U: return PixelFormat::BC7U; case Tegra::Texture::TextureFormat::ASTC_2D_4X4: @@ -362,6 +367,8 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::DXT45; case PixelFormat::DXN1: return Tegra::Texture::TextureFormat::DXN1; + case PixelFormat::DXN2: + return Tegra::Texture::TextureFormat::DXN2; case PixelFormat::BC7U: return Tegra::Texture::TextureFormat::BC7U; case PixelFormat::ASTC_2D_4X4: diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 65db84ad3..7ea66584c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -54,6 +54,7 @@ u32 BytesPerPixel(TextureFormat format) { return 8; case TextureFormat::DXT23: case TextureFormat::DXT45: + case TextureFormat::DXN2: case TextureFormat::BC7U: // In this case a 'pixel' actually refers to a 4x4 tile. return 16; @@ -113,6 +114,7 @@ std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, case TextureFormat::DXT23: case TextureFormat::DXT45: case TextureFormat::DXN1: + case TextureFormat::DXN2: case TextureFormat::BC7U: // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel // values. @@ -179,6 +181,7 @@ std::vector DecodeTexture(const std::vector& texture_data, TextureFormat case TextureFormat::DXT23: case TextureFormat::DXT45: case TextureFormat::DXN1: + case TextureFormat::DXN2: case TextureFormat::BC7U: case TextureFormat::ASTC_2D_4X4: case TextureFormat::A8R8G8B8: -- cgit v1.2.3 From 59ea37daa7b822f26737bface58a050ed2899fec Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 9 Aug 2018 15:24:17 -0400 Subject: gl_rasterizer_cache: Avoid iterator invalidation issues within InvalidateRegion() A range-based for loop can't be used when the container being iterated is also being erased from. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 8b6d1b89d..c447e999c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -761,10 +761,12 @@ void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size* } void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { - for (const auto& pair : surface_cache) { - const auto& surface{pair.second}; + for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) { + const auto& surface{iter->second}; const auto& params{surface->GetSurfaceParams()}; + ++iter; + if (params.IsOverlappingRegion(addr, size)) { UnregisterSurface(surface); } -- cgit v1.2.3 From 6ef027b958471dbd67dadbad5838f9cd246177ae Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 9 Aug 2018 17:29:09 -0400 Subject: gl_shader_decompiler: Reserve element memory beforehand in BuildRegisterList() Avoids potentially perfoming multiple reallocations when we know the total amount of memory we need beforehand. --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ea7779429..32f06f409 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -507,6 +507,8 @@ private: /// Build the GLSL register list. void BuildRegisterList() { + regs.reserve(Register::NumRegisters); + for (size_t index = 0; index < Register::NumRegisters; ++index) { regs.emplace_back(index, suffix); } -- cgit v1.2.3 From 75e12a33ae0479fed08d4f7dfe0ec95bf222084a Mon Sep 17 00:00:00 2001 From: Khangaroo Date: Thu, 9 Aug 2018 19:15:32 -0400 Subject: Implement SNORM for BC5/DXN2 (#998) * Implement BC5/DXN2 (#996) - Used by Kirby Star Allies. * Implement BC5/DXN2 SNORM UNORM for Kirby Star Allies SNORM for Super Mario Odyssey --- .../renderer_opengl/gl_rasterizer_cache.cpp | 29 ++++++---- .../renderer_opengl/gl_rasterizer_cache.h | 64 +++++++++++++--------- 2 files changed, 55 insertions(+), 38 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index c447e999c..f6efce818 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -109,7 +109,9 @@ static constexpr std::array tex_form {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT45 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 - {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN2 + {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXN2UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 @@ -219,17 +221,18 @@ static constexpr std::array, MortonCopy, MortonCopy, MortonCopy, MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, - MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, MortonCopy, + MortonCopy, }; static constexpr std::array, MortonCopy, MortonCopy, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 6f01b2bf0..26e2ee203 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -35,32 +35,33 @@ struct SurfaceParams { DXT23 = 9, DXT45 = 10, DXN1 = 11, // This is also known as BC4 - DXN2 = 12, // This is also known as BC5 - BC7U = 13, - ASTC_2D_4X4 = 14, - G8R8 = 15, - BGRA8 = 16, - RGBA32F = 17, - RG32F = 18, - R32F = 19, - R16F = 20, - R16UNORM = 21, - RG16 = 22, - RG16F = 23, - RG16UI = 24, - RG16I = 25, - RG16S = 26, - RGB32F = 27, - SRGBA8 = 28, + DXN2UNORM = 12, + DXN2SNORM = 13, + BC7U = 14, + ASTC_2D_4X4 = 15, + G8R8 = 16, + BGRA8 = 17, + RGBA32F = 18, + RG32F = 19, + R32F = 20, + R16F = 21, + R16UNORM = 22, + RG16 = 23, + RG16F = 24, + RG16UI = 25, + RG16I = 26, + RG16S = 27, + RGB32F = 28, + SRGBA8 = 29, MaxColorFormat, // DepthStencil formats - Z24S8 = 29, - S8Z24 = 30, - Z32F = 31, - Z16 = 32, - Z32FS8 = 33, + Z24S8 = 30, + S8Z24 = 31, + Z32F = 32, + Z16 = 33, + Z32FS8 = 34, MaxDepthStencilFormat, @@ -110,7 +111,8 @@ struct SurfaceParams { 4, // DXT23 4, // DXT45 4, // DXN1 - 4, // DXN2 + 4, // DXN2UNORM + 4, // DXN2SNORM 4, // BC7U 4, // ASTC_2D_4X4 1, // G8R8 @@ -155,7 +157,8 @@ struct SurfaceParams { 128, // DXT23 128, // DXT45 64, // DXN1 - 128, // DXN2 + 128, // DXN2UNORM + 128, // DXN2SNORM 128, // BC7U 32, // ASTC_2D_4X4 16, // G8R8 @@ -309,7 +312,15 @@ struct SurfaceParams { case Tegra::Texture::TextureFormat::DXN1: return PixelFormat::DXN1; case Tegra::Texture::TextureFormat::DXN2: - return PixelFormat::DXN2; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::DXN2UNORM; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::DXN2SNORM; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::BC7U: return PixelFormat::BC7U; case Tegra::Texture::TextureFormat::ASTC_2D_4X4: @@ -367,7 +378,8 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::DXT45; case PixelFormat::DXN1: return Tegra::Texture::TextureFormat::DXN1; - case PixelFormat::DXN2: + case PixelFormat::DXN2UNORM: + case PixelFormat::DXN2SNORM: return Tegra::Texture::TextureFormat::DXN2; case PixelFormat::BC7U: return Tegra::Texture::TextureFormat::BC7U; -- cgit v1.2.3 From e8c52d4c895cc4ab7a4d8962112323c9d15e922c Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 9 Aug 2018 00:30:02 -0400 Subject: gl_rasterizer_cache: Add bounds checking for gl_buffer copies. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f6efce818..114d35ce6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -184,35 +184,37 @@ MathUtil::Rectangle SurfaceParams::GetRect() const { } template -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { +void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector& gl_buffer, + Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { + std::vector data; if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { - auto data = Tegra::Texture::UnswizzleTexture( + data = Tegra::Texture::UnswizzleTexture( *gpu.memory_manager->GpuToCpuAddress(addr), SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - std::memcpy(gl_buffer, data.data(), data.size()); } else { - auto data = Tegra::Texture::UnswizzleDepthTexture( + data = Tegra::Texture::UnswizzleDepthTexture( *gpu.memory_manager->GpuToCpuAddress(addr), SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height); - std::memcpy(gl_buffer, data.data(), data.size()); } + const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; + gl_buffer.assign(data.begin(), data.begin() + size_to_copy); } else { // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should // check the configuration for this and perform more generic un/swizzle LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(), morton_to_gl); } } -static constexpr std::array&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { MortonCopy, MortonCopy, @@ -235,7 +237,7 @@ static constexpr std::array, }; -static constexpr std::array&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { MortonCopy, @@ -467,7 +469,7 @@ void CachedSurface::LoadGLBuffer() { gl_buffer.resize(copy_size); morton_to_gl_fns[static_cast(params.pixel_format)]( - params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + params.width, params.block_height, params.height, gl_buffer, params.addr); } else { const u8* const texture_src_data_end = texture_src_data + copy_size; @@ -494,7 +496,7 @@ void CachedSurface::FlushGLBuffer() { std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); } else { gl_to_morton_fns[static_cast(params.pixel_format)]( - params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + params.width, params.block_height, params.height, gl_buffer, params.addr); } } -- cgit v1.2.3 From 3a67876252d616e1221e1a83b2dbe387993ad124 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 9 Aug 2018 20:17:48 -0400 Subject: textures: Refactor out for Texture/Depth FormatFromPixelFormat. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 31 +++++--- .../renderer_opengl/gl_rasterizer_cache.h | 86 ---------------------- src/video_core/textures/decoders.cpp | 85 +-------------------- src/video_core/textures/decoders.h | 4 +- 4 files changed, 27 insertions(+), 179 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 114d35ce6..885403cd0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -183,6 +183,21 @@ MathUtil::Rectangle SurfaceParams::GetRect() const { return {0, actual_height, width, 0}; } +/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN +static bool IsFormatBCn(PixelFormat format) { + switch (format) { + case PixelFormat::DXT1: + case PixelFormat::DXT23: + case PixelFormat::DXT45: + case PixelFormat::DXN1: + case PixelFormat::DXN2SNORM: + case PixelFormat::DXN2UNORM: + case PixelFormat::BC7U: + return true; + } + return false; +} + template void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector& gl_buffer, Tegra::GPUVAddr addr) { @@ -191,16 +206,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector& gl_bu const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { - std::vector data; - if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { - data = Tegra::Texture::UnswizzleTexture( - *gpu.memory_manager->GpuToCpuAddress(addr), - SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - } else { - data = Tegra::Texture::UnswizzleDepthTexture( - *gpu.memory_manager->GpuToCpuAddress(addr), - SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height); - } + // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual + // pixel values. + const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; + const std::vector data = + Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size, + bytes_per_pixel, stride, height, block_height); const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; gl_buffer.assign(data.begin(), data.begin() + size_to_copy); } else { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 26e2ee203..36213c403 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -348,92 +348,6 @@ struct SurfaceParams { } } - static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { - // TODO(Subv): Properly implement this - switch (format) { - case PixelFormat::ABGR8: - case PixelFormat::SRGBA8: - return Tegra::Texture::TextureFormat::A8R8G8B8; - case PixelFormat::B5G6R5: - return Tegra::Texture::TextureFormat::B5G6R5; - case PixelFormat::A2B10G10R10: - return Tegra::Texture::TextureFormat::A2B10G10R10; - case PixelFormat::A1B5G5R5: - return Tegra::Texture::TextureFormat::A1B5G5R5; - case PixelFormat::R8: - return Tegra::Texture::TextureFormat::R8; - case PixelFormat::G8R8: - return Tegra::Texture::TextureFormat::G8R8; - case PixelFormat::RGBA16F: - return Tegra::Texture::TextureFormat::R16_G16_B16_A16; - case PixelFormat::R11FG11FB10F: - return Tegra::Texture::TextureFormat::BF10GF11RF11; - case PixelFormat::RGBA32UI: - return Tegra::Texture::TextureFormat::R32_G32_B32_A32; - case PixelFormat::DXT1: - return Tegra::Texture::TextureFormat::DXT1; - case PixelFormat::DXT23: - return Tegra::Texture::TextureFormat::DXT23; - case PixelFormat::DXT45: - return Tegra::Texture::TextureFormat::DXT45; - case PixelFormat::DXN1: - return Tegra::Texture::TextureFormat::DXN1; - case PixelFormat::DXN2UNORM: - case PixelFormat::DXN2SNORM: - return Tegra::Texture::TextureFormat::DXN2; - case PixelFormat::BC7U: - return Tegra::Texture::TextureFormat::BC7U; - case PixelFormat::ASTC_2D_4X4: - return Tegra::Texture::TextureFormat::ASTC_2D_4X4; - case PixelFormat::BGRA8: - // TODO(bunnei): This is fine for unswizzling (since we just need the right component - // sizes), but could be a bug if we used this function in different ways. - return Tegra::Texture::TextureFormat::A8R8G8B8; - case PixelFormat::RGBA32F: - return Tegra::Texture::TextureFormat::R32_G32_B32_A32; - case PixelFormat::RGB32F: - return Tegra::Texture::TextureFormat::R32_G32_B32; - case PixelFormat::RG32F: - return Tegra::Texture::TextureFormat::R32_G32; - case PixelFormat::R32F: - return Tegra::Texture::TextureFormat::R32; - case PixelFormat::R16F: - case PixelFormat::R16UNORM: - return Tegra::Texture::TextureFormat::R16; - case PixelFormat::Z32F: - return Tegra::Texture::TextureFormat::ZF32; - case PixelFormat::Z24S8: - return Tegra::Texture::TextureFormat::Z24S8; - case PixelFormat::RG16F: - case PixelFormat::RG16: - case PixelFormat::RG16UI: - case PixelFormat::RG16I: - case PixelFormat::RG16S: - return Tegra::Texture::TextureFormat::R16_G16; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast(format)); - UNREACHABLE(); - } - } - - static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) { - switch (format) { - case PixelFormat::S8Z24: - return Tegra::DepthFormat::S8_Z24_UNORM; - case PixelFormat::Z24S8: - return Tegra::DepthFormat::Z24_S8_UNORM; - case PixelFormat::Z32F: - return Tegra::DepthFormat::Z32_FLOAT; - case PixelFormat::Z16: - return Tegra::DepthFormat::Z16_UNORM; - case PixelFormat::Z32FS8: - return Tegra::DepthFormat::Z32_S8_X24_FLOAT; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast(format)); - UNREACHABLE(); - } - } - static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { // TODO(Subv): Implement more component types switch (type) { diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 7ea66584c..70746a34e 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -86,88 +86,11 @@ u32 BytesPerPixel(TextureFormat format) { } } -static u32 DepthBytesPerPixel(DepthFormat format) { - switch (format) { - case DepthFormat::Z16_UNORM: - return 2; - case DepthFormat::S8_Z24_UNORM: - case DepthFormat::Z24_S8_UNORM: - case DepthFormat::Z32_FLOAT: - return 4; - case DepthFormat::Z32_S8_X24_FLOAT: - return 8; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } -} - -std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, - u32 block_height) { - u8* data = Memory::GetPointer(address); - u32 bytes_per_pixel = BytesPerPixel(format); - - std::vector unswizzled_data(width * height * bytes_per_pixel); - - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN1: - case TextureFormat::DXN2: - case TextureFormat::BC7U: - // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel - // values. - CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::R8: - case TextureFormat::G8R8: - case TextureFormat::R16_G16_B16_A16: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R16_G16: - case TextureFormat::BF10GF11RF11: - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::R32_G32_B32: - CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - - return unswizzled_data; -} - -std::vector UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height, - u32 block_height) { - u8* data = Memory::GetPointer(address); - u32 bytes_per_pixel = DepthBytesPerPixel(format); - +std::vector UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, + u32 height, u32 block_height) { std::vector unswizzled_data(width * height * bytes_per_pixel); - - switch (format) { - case DepthFormat::Z16_UNORM: - case DepthFormat::S8_Z24_UNORM: - case DepthFormat::Z24_S8_UNORM: - case DepthFormat::Z32_FLOAT: - case DepthFormat::Z32_S8_X24_FLOAT: - CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - + CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, + Memory::GetPointer(address), unswizzled_data.data(), true, block_height); return unswizzled_data; } diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 73a4924d1..1f7b731be 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -13,8 +13,8 @@ namespace Tegra::Texture { /** * Unswizzles a swizzled texture without changing its format. */ -std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, - u32 block_height = TICEntry::DefaultBlockHeight); +std::vector UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, + u32 height, u32 block_height = TICEntry::DefaultBlockHeight); /** * Unswizzles a swizzled depth texture without changing its format. -- cgit v1.2.3 From 0e1510ac2923eee590db38350ae7061c30516586 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Thu, 9 Aug 2018 20:54:04 -0400 Subject: gl_rasterizer_cache: Remove unused viewport parameter of GetFramebufferSurfaces() --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 7 +++---- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 4 ++-- src/video_core/renderer_opengl/gl_rasterizer_cache.h | 3 +-- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 546e86532..51e50cbcf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -332,8 +332,6 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers(bool using_c // TODO(bunnei): Implement this const bool has_stencil = false; - const MathUtil::Rectangle viewport_rect{regs.viewport_transform[0].GetRect()}; - const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; @@ -346,9 +344,10 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers(bool using_c Surface depth_surface; MathUtil::Rectangle surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); - MathUtil::Rectangle draw_rect{ + const MathUtil::Rectangle viewport_rect{regs.viewport_transform[0].GetRect()}; + const MathUtil::Rectangle draw_rect{ static_cast(std::clamp(static_cast(surfaces_rect.left) + viewport_rect.left, surfaces_rect.left, surfaces_rect.right)), // Left static_cast(std::clamp(static_cast(surfaces_rect.bottom) + viewport_rect.top, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index f6efce818..04ebdbaf9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -609,8 +609,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu return GetSurface(SurfaceParams::CreateForTexture(config)); } -SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle& viewport) { +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, + bool using_depth_fb) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; // TODO(bunnei): This is hard corded to use just the first render buffer diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 26e2ee203..8a0ba64e8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -634,8 +634,7 @@ public: Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - const MathUtil::Rectangle& viewport); + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); /// Flushes the surface to Switch memory void FlushSurface(const Surface& surface); -- cgit v1.2.3 From 2156cb3cbedd5c684bafff4500d20868969bc167 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 10 Aug 2018 10:39:46 -0400 Subject: Revert "gl_state: Temporarily disable culling and depth test." --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 546e86532..bed14d5fe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -812,9 +812,7 @@ void RasterizerOpenGL::SyncClipCoef() { void RasterizerOpenGL::SyncCullMode() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - // TODO(bunnei): Enable the below once more things work - until then, this may hide regressions - // state.cull.enabled = regs.cull.enabled != 0; - state.cull.enabled = false; + state.cull.enabled = regs.cull.enabled != 0; if (state.cull.enabled) { state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); -- cgit v1.2.3 From a5b65df9cf0cd817585c2ccdb744c62be25cb916 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 10 Aug 2018 11:45:23 -0400 Subject: maxwell_to_gl: Implement VertexAttribute::Size::Size_32_32_32. - Used by Super Mario Odyssey. --- src/video_core/renderer_opengl/maxwell_to_gl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 43be69dd1..d7345e8a4 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -45,6 +45,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Type::SignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return GL_INT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: -- cgit v1.2.3 From 6b0bc48a427043d887722b392c2e258d74134f4e Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 10 Aug 2018 12:17:49 -0400 Subject: maxwell_to_gl: Implement VertexAttribute::Size::Size_8_8. - Used by Super Mario Odyssey. --- src/video_core/renderer_opengl/maxwell_to_gl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index d7345e8a4..c439446b1 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -47,6 +47,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_32_32_32: return GL_INT; + case Maxwell::VertexAttribute::Size::Size_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: -- cgit v1.2.3