14 files changed, 133 insertions, 71 deletions
diff --git a/src/common/color.h b/src/common/color.h
index 24a445dac..0379040be 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include <cstring>
+
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "common/vector_math.h"
@@ -55,7 +57,7 @@ constexpr u8 Convert8To6(u8 value) {
  * @param bytes Pointer to encoded source color
  * @return Result color decoded as Math::Vec4<u8>
  */
-inline const Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
     return {bytes[3], bytes[2], bytes[1], bytes[0]};
 }
 
@@ -64,7 +66,7 @@ inline const Math::Vec4<u8> DecodeRGBA8(const u8* bytes) {
  * @param bytes Pointer to encoded source color
  * @return Result color decoded as Math::Vec4<u8>
  */
-inline const Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
     return {bytes[2], bytes[1], bytes[0], 255};
 }
 
@@ -73,7 +75,7 @@ inline const Math::Vec4<u8> DecodeRGB8(const u8* bytes) {
  * @param bytes Pointer to encoded source color
  * @return Result color decoded as Math::Vec4<u8>
  */
-inline const Math::Vec4<u8> DecodeRG8(const u8* bytes) {
+inline Math::Vec4<u8> DecodeRG8(const u8* bytes) {
     return {bytes[1], bytes[0], 0, 255};
 }
 
@@ -82,8 +84,9 @@ inline const Math::Vec4<u8> DecodeRG8(const u8* bytes) {
  * @param bytes Pointer to encoded source color
  * @return Result color decoded as Math::Vec4<u8>
  */
-inline const Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
-    const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes);
+inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
+    u16_le pixel;
+    std::memcpy(&pixel, bytes, sizeof(pixel));
     return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
             Convert5To8(pixel & 0x1F), 255};
 }
@@ -93,8 +96,9 @@ inline const Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
  * @param bytes Pointer to encoded source color
  * @return Result color decoded as Math::Vec4<u8>
  */
-inline const Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
-    const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes);
+inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
+    u16_le pixel;
+    std::memcpy(&pixel, bytes, sizeof(pixel));
     return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
             Convert5To8((pixel >> 1) & 0x1F), Convert1To8(pixel & 0x1)};
 }
@@ -104,8 +108,9 @@ inline const Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
  * @param bytes Pointer to encoded source color
  * @return Result color decoded as Math::Vec4<u8>
  */
-inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
-    const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes);
+inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
+    u16_le pixel;
+    std::memcpy(&pixel, bytes, sizeof(pixel));
     return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
             Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF)};
 }
@@ -116,7 +121,9 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
  * @return Depth value as an u32
  */
 inline u32 DecodeD16(const u8* bytes) {
-    return *reinterpret_cast<const u16_le*>(bytes);
+    u16_le data;
+    std::memcpy(&data, bytes, sizeof(data));
+    return data;
 }
 
 /**
@@ -133,7 +140,7 @@ inline u32 DecodeD24(const u8* bytes) {
  * @param bytes Pointer to encoded source values
  * @return Resulting values stored as a Math::Vec2
  */
-inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
     return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
 }
 
@@ -175,8 +182,10 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
  * @param bytes Destination pointer to store encoded color
  */
 inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
-    *reinterpret_cast<u16_le*>(bytes) =
+    const u16_le data =
         (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
+
+    std::memcpy(bytes, &data, sizeof(data));
 }
 
 /**
@@ -185,9 +194,10 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
  * @param bytes Destination pointer to store encoded color
  */
 inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
-    *reinterpret_cast<u16_le*>(bytes) = (Convert8To5(color.r()) << 11) |
-                                        (Convert8To5(color.g()) << 6) |
-                                        (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
+    const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
+                        (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
+
+    std::memcpy(bytes, &data, sizeof(data));
 }
 
 /**
@@ -196,9 +206,10 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
  * @param bytes Destination pointer to store encoded color
  */
 inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
-    *reinterpret_cast<u16_le*>(bytes) = (Convert8To4(color.r()) << 12) |
-                                        (Convert8To4(color.g()) << 8) |
-                                        (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
+    const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
+                     (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
+
+    std::memcpy(bytes, &data, sizeof(data));
 }
 
 /**
@@ -207,7 +218,8 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
  * @param bytes Pointer where to store the encoded value
  */
 inline void EncodeD16(u32 value, u8* bytes) {
-    *reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF;
+    const u16_le data = static_cast<u16>(value);
+    std::memcpy(bytes, &data, sizeof(data));
 }
 
 /**
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 5c94fcda3..8feb49941 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -78,7 +78,7 @@ public:
     }
 
     template <typename U = T>
-    constexpr Vec2<std::enable_if_t<std::is_signed<U>::value, U>> operator-() const {
+    constexpr Vec2<std::enable_if_t<std::is_signed_v<U>, U>> operator-() const {
         return {-x, -y};
     }
     constexpr Vec2<decltype(T{} * T{})> operator*(const Vec2& other) const {
@@ -227,7 +227,7 @@ public:
     }
 
     template <typename U = T>
-    constexpr Vec3<std::enable_if_t<std::is_signed<U>::value, U>> operator-() const {
+    constexpr Vec3<std::enable_if_t<std::is_signed_v<U>, U>> operator-() const {
         return {-x, -y, -z};
     }
 
@@ -436,7 +436,7 @@ public:
     }
 
     template <typename U = T>
-    constexpr Vec4<std::enable_if_t<std::is_signed<U>::value, U>> operator-() const {
+    constexpr Vec4<std::enable_if_t<std::is_signed_v<U>, U>> operator-() const {
         return {-x, -y, -z, -w};
     }
 
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index e952b0518..f3c5b1b9c 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -42,7 +42,7 @@ public:
             {0, &IProfile::Get, "Get"},
             {1, &IProfile::GetBase, "GetBase"},
             {10, nullptr, "GetImageSize"},
-            {11, nullptr, "LoadImage"},
+            {11, &IProfile::LoadImage, "LoadImage"},
         };
         RegisterHandlers(functions);
     }
@@ -83,6 +83,27 @@ private:
         rb.PushRaw(profile_base);
     }
 
+    void LoadImage(Kernel::HLERequestContext& ctx) {
+        LOG_WARNING(Service_ACC, "(STUBBED) called");
+        // smallest jpeg https://github.com/mathiasbynens/small/blob/master/jpeg.jpg
+        // TODO(mailwl): load actual profile image from disk, width 256px, max size 0x20000
+        const u32 jpeg_size = 107;
+        static const std::array<u8, jpeg_size> jpeg{
+            0xff, 0xd8, 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03,
+            0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x06, 0x04, 0x04, 0x04, 0x04, 0x04,
+            0x08, 0x06, 0x06, 0x05, 0x06, 0x09, 0x08, 0x0a, 0x0a, 0x09, 0x08, 0x09, 0x09, 0x0a,
+            0x0c, 0x0f, 0x0c, 0x0a, 0x0b, 0x0e, 0x0b, 0x09, 0x09, 0x0d, 0x11, 0x0d, 0x0e, 0x0f,
+            0x10, 0x10, 0x11, 0x10, 0x0a, 0x0c, 0x12, 0x13, 0x12, 0x10, 0x13, 0x0f, 0x10, 0x10,
+            0x10, 0xff, 0xc9, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x11, 0x00,
+            0xff, 0xcc, 0x00, 0x06, 0x00, 0x10, 0x10, 0x05, 0xff, 0xda, 0x00, 0x08, 0x01, 0x01,
+            0x00, 0x00, 0x3f, 0x00, 0xd2, 0xcf, 0x20, 0xff, 0xd9,
+        };
+        ctx.WriteBuffer(jpeg.data(), jpeg_size);
+        IPC::ResponseBuilder rb{ctx, 3};
+        rb.Push(RESULT_SUCCESS);
+        rb.Push<u32>(jpeg_size);
+    }
+
     u128 user_id; ///< The user id this profile refers to.
 };
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 116dabedb..4cdf7f613 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -147,7 +147,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
     }
     params.fence_out.id = 0;
     params.fence_out.value = 0;
-    std::memcpy(output.data(), &params, output.size());
+    std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
     return 0;
 }
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5c0ae8009..ed22a2090 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -238,6 +238,8 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
 
     auto& buffer = shader.const_buffers[bind_data.index];
 
+    ASSERT(bind_data.index < Regs::MaxConstBuffers);
+
     buffer.enabled = bind_data.valid.Value() != 0;
     buffer.index = bind_data.index;
     buffer.address = regs.const_buffer.BufferAddress();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 4d0ff96a5..0506ac8fe 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -44,7 +44,7 @@ public:
         static constexpr size_t MaxShaderProgram = 6;
         static constexpr size_t MaxShaderStage = 5;
         // Maximum number of const buffers per shader stage.
-        static constexpr size_t MaxConstBuffers = 16;
+        static constexpr size_t MaxConstBuffers = 18;
 
         enum class QueryMode : u32 {
             Write = 0,
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c7e3fb4b1..3d4557b7e 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -78,6 +78,8 @@ union Attribute {
         // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
         // shader.
         TessCoordInstanceIDVertexID = 47,
+        // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
+        Unknown_63 = 63,
     };
 
     union {
@@ -254,20 +256,15 @@ union Instruction {
             BitField<56, 1, u64> invert_b;
         } lop32i;
 
-        float GetImm20_19() const {
-            float result{};
+        u32 GetImm20_19() const {
             u32 imm{static_cast<u32>(imm20_19)};
             imm <<= 12;
             imm |= negate_imm ? 0x80000000 : 0;
-            std::memcpy(&result, &imm, sizeof(imm));
-            return result;
+            return imm;
         }
 
-        float GetImm20_32() const {
-            float result{};
-            s32 imm{static_cast<s32>(imm20_32)};
-            std::memcpy(&result, &imm, sizeof(imm));
-            return result;
+        u32 GetImm20_32() const {
+            return static_cast<u32>(imm20_32);
         }
 
         s32 GetSignedImm20_20() const {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 440505c9d..874eddd78 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -34,6 +34,7 @@ enum class RenderTargetFormat : u32 {
     RG16_FLOAT = 0xDE,
     R11G11B10_FLOAT = 0xE0,
     R32_FLOAT = 0xE5,
+    B5G6R5_UNORM = 0xE8,
     R16_FLOAT = 0xF2,
     R8_UNORM = 0xF3,
 };
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 039e9e0ca..546e86532 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -664,7 +664,10 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
         auto& buffer_draw_state =
             state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
 
-        ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer");
+        if (!buffer.enabled) {
+            continue;
+        }
+
         buffer_draw_state.enabled = true;
         buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 257aa9571..2e68dab11 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -109,6 +109,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
      true},                                                                                 // DXT45
     {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
+    {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true},   // DXN2
     {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
      true},                                                                    // BC7U
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_4X4
@@ -218,17 +219,17 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
         MortonCopy<true, PixelFormat::DXT1>,         MortonCopy<true, PixelFormat::DXT23>,
         MortonCopy<true, PixelFormat::DXT45>,        MortonCopy<true, PixelFormat::DXN1>,
-        MortonCopy<true, PixelFormat::BC7U>,         MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
-        MortonCopy<true, PixelFormat::G8R8>,         MortonCopy<true, PixelFormat::BGRA8>,
-        MortonCopy<true, PixelFormat::RGBA32F>,      MortonCopy<true, PixelFormat::RG32F>,
-        MortonCopy<true, PixelFormat::R32F>,         MortonCopy<true, PixelFormat::R16F>,
-        MortonCopy<true, PixelFormat::R16UNORM>,     MortonCopy<true, PixelFormat::RG16>,
-        MortonCopy<true, PixelFormat::RG16F>,        MortonCopy<true, PixelFormat::RG16UI>,
-        MortonCopy<true, PixelFormat::RG16I>,        MortonCopy<true, PixelFormat::RG16S>,
-        MortonCopy<true, PixelFormat::RGB32F>,       MortonCopy<true, PixelFormat::SRGBA8>,
-        MortonCopy<true, PixelFormat::Z24S8>,        MortonCopy<true, PixelFormat::S8Z24>,
-        MortonCopy<true, PixelFormat::Z32F>,         MortonCopy<true, PixelFormat::Z16>,
-        MortonCopy<true, PixelFormat::Z32FS8>,
+        MortonCopy<true, PixelFormat::DXN2>,         MortonCopy<true, PixelFormat::BC7U>,
+        MortonCopy<true, PixelFormat::ASTC_2D_4X4>,  MortonCopy<true, PixelFormat::G8R8>,
+        MortonCopy<true, PixelFormat::BGRA8>,        MortonCopy<true, PixelFormat::RGBA32F>,
+        MortonCopy<true, PixelFormat::RG32F>,        MortonCopy<true, PixelFormat::R32F>,
+        MortonCopy<true, PixelFormat::R16F>,         MortonCopy<true, PixelFormat::R16UNORM>,
+        MortonCopy<true, PixelFormat::RG16>,         MortonCopy<true, PixelFormat::RG16F>,
+        MortonCopy<true, PixelFormat::RG16UI>,       MortonCopy<true, PixelFormat::RG16I>,
+        MortonCopy<true, PixelFormat::RG16S>,        MortonCopy<true, PixelFormat::RGB32F>,
+        MortonCopy<true, PixelFormat::SRGBA8>,       MortonCopy<true, PixelFormat::Z24S8>,
+        MortonCopy<true, PixelFormat::S8Z24>,        MortonCopy<true, PixelFormat::Z32F>,
+        MortonCopy<true, PixelFormat::Z16>,          MortonCopy<true, PixelFormat::Z32FS8>,
 };
 
 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -242,7 +243,8 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<false, PixelFormat::RGBA16F>,
         MortonCopy<false, PixelFormat::R11FG11FB10F>,
         MortonCopy<false, PixelFormat::RGBA32UI>,
-        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/BC7U/ASTC_2D_4X4 formats is not supported
+        // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not
+        // supported
         nullptr,
         nullptr,
         nullptr,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0c6652c7a..6f01b2bf0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -35,31 +35,32 @@ struct SurfaceParams {
         DXT23 = 9,
         DXT45 = 10,
         DXN1 = 11, // This is also known as BC4
-        BC7U = 12,
-        ASTC_2D_4X4 = 13,
-        G8R8 = 14,
-        BGRA8 = 15,
-        RGBA32F = 16,
-        RG32F = 17,
-        R32F = 18,
-        R16F = 19,
-        R16UNORM = 20,
-        RG16 = 21,
-        RG16F = 22,
-        RG16UI = 23,
-        RG16I = 24,
-        RG16S = 25,
-        RGB32F = 26,
-        SRGBA8 = 27,
+        DXN2 = 12, // This is also known as BC5
+        BC7U = 13,
+        ASTC_2D_4X4 = 14,
+        G8R8 = 15,
+        BGRA8 = 16,
+        RGBA32F = 17,
+        RG32F = 18,
+        R32F = 19,
+        R16F = 20,
+        R16UNORM = 21,
+        RG16 = 22,
+        RG16F = 23,
+        RG16UI = 24,
+        RG16I = 25,
+        RG16S = 26,
+        RGB32F = 27,
+        SRGBA8 = 28,
 
         MaxColorFormat,
 
         // DepthStencil formats
-        Z24S8 = 28,
-        S8Z24 = 29,
-        Z32F = 30,
-        Z16 = 31,
-        Z32FS8 = 32,
+        Z24S8 = 29,
+        S8Z24 = 30,
+        Z32F = 31,
+        Z16 = 32,
+        Z32FS8 = 33,
 
         MaxDepthStencilFormat,
 
@@ -109,6 +110,7 @@ struct SurfaceParams {
             4, // DXT23
             4, // DXT45
             4, // DXN1
+            4, // DXN2
             4, // BC7U
             4, // ASTC_2D_4X4
             1, // G8R8
@@ -153,6 +155,7 @@ struct SurfaceParams {
             128, // DXT23
             128, // DXT45
             64,  // DXN1
+            128, // DXN2
             128, // BC7U
             32,  // ASTC_2D_4X4
             16,  // G8R8
@@ -221,6 +224,8 @@ struct SurfaceParams {
             return PixelFormat::RG32F;
         case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
             return PixelFormat::R11FG11FB10F;
+        case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+            return PixelFormat::B5G6R5;
         case Tegra::RenderTargetFormat::RGBA32_UINT:
             return PixelFormat::RGBA32UI;
         case Tegra::RenderTargetFormat::R8_UNORM:
@@ -303,6 +308,8 @@ struct SurfaceParams {
             return PixelFormat::DXT45;
         case Tegra::Texture::TextureFormat::DXN1:
             return PixelFormat::DXN1;
+        case Tegra::Texture::TextureFormat::DXN2:
+            return PixelFormat::DXN2;
         case Tegra::Texture::TextureFormat::BC7U:
             return PixelFormat::BC7U;
         case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
@@ -360,6 +367,8 @@ struct SurfaceParams {
             return Tegra::Texture::TextureFormat::DXT45;
         case PixelFormat::DXN1:
             return Tegra::Texture::TextureFormat::DXN1;
+        case PixelFormat::DXN2:
+            return Tegra::Texture::TextureFormat::DXN2;
         case PixelFormat::BC7U:
             return Tegra::Texture::TextureFormat::BC7U;
         case PixelFormat::ASTC_2D_4X4:
@@ -441,6 +450,7 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
         case Tegra::RenderTargetFormat::R8_UNORM:
         case Tegra::RenderTargetFormat::RG16_UNORM:
+        case Tegra::RenderTargetFormat::B5G6R5_UNORM:
             return ComponentType::UNorm;
         case Tegra::RenderTargetFormat::RG16_SNORM:
             return ComponentType::SNorm;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e3217db81..dd240a4ce 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -523,6 +523,11 @@ private:
             // shader.
             ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
             return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
+        case Attribute::Index::Unknown_63:
+            // TODO(bunnei): Figure out what this is used for. Super Mario Odyssey uses this.
+            LOG_CRITICAL(HW_GPU, "Unhandled input attribute Unknown_63");
+            UNREACHABLE();
+            break;
         default:
             const u32 index{static_cast<u32>(attribute) -
                             static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -534,6 +539,8 @@ private:
             LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
             UNREACHABLE();
         }
+
+        return "vec4(0, 0, 0, 0)";
     }
 
     /// Generates code representing an output attribute register.
@@ -602,12 +609,12 @@ private:
 
     /// Generates code representing a 19-bit immediate value
     static std::string GetImmediate19(const Instruction& instr) {
-        return std::to_string(instr.alu.GetImm20_19());
+        return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19());
     }
 
     /// Generates code representing a 32-bit immediate value
     static std::string GetImmediate32(const Instruction& instr) {
-        return std::to_string(instr.alu.GetImm20_32());
+        return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32());
     }
 
     /// Generates code representing a texture sampler.
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 24b1d956b..5c7b636e4 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -7,6 +7,10 @@
 #include <array>
 #include <glad/glad.h>
 
+#include "video_core/engines/maxwell_3d.h"
+
+using Regs = Tegra::Engines::Maxwell3D::Regs;
+
 namespace TextureUnits {
 
 struct TextureUnit {
@@ -120,7 +124,7 @@ public:
             GLuint bindpoint;
             GLuint ssbo;
         };
-        std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{};
+        std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers;
     } draw;
 
     struct {
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 65db84ad3..7ea66584c 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -54,6 +54,7 @@ u32 BytesPerPixel(TextureFormat format) {
         return 8;
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
+    case TextureFormat::DXN2:
     case TextureFormat::BC7U:
         // In this case a 'pixel' actually refers to a 4x4 tile.
         return 16;
@@ -113,6 +114,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
     case TextureFormat::DXN1:
+    case TextureFormat::DXN2:
     case TextureFormat::BC7U:
         // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel
         // values.
@@ -179,6 +181,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
     case TextureFormat::DXN1:
+    case TextureFormat::DXN2:
     case TextureFormat::BC7U:
     case TextureFormat::ASTC_2D_4X4:
     case TextureFormat::A8R8G8B8: