42 files changed, 668 insertions, 280 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 525b2363c..07e75f9d8 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate_program.h
     host_translate_info.h
     ir_opt/collect_shader_info_pass.cpp
+    ir_opt/conditional_barrier_pass.cpp
     ir_opt/constant_propagation_pass.cpp
     ir_opt/dead_code_elimination_pass.cpp
     ir_opt/dual_vertex_pass.cpp
@@ -223,6 +224,7 @@ add_library(shader_recompiler STATIC
     ir_opt/identity_removal_pass.cpp
     ir_opt/layer_pass.cpp
     ir_opt/lower_fp16_to_fp32.cpp
+    ir_opt/lower_fp64_to_fp32.cpp
     ir_opt/lower_int64_to_int32.cpp
     ir_opt/passes.h
     ir_opt/position_pass.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
index 0cb1e193e..b795c0179 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp
@@ -279,6 +279,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile,
     header += "OPTION NV_internal;"
               "OPTION NV_shader_storage_buffer;"
               "OPTION NV_gpu_program_fp64;";
+    // TODO: Enable only when MS is used
+    header += "OPTION NV_texture_multisample;";
     if (info.uses_int64_bit_atomics) {
         header += "OPTION NV_shader_atomic_int64;";
     }
@@ -459,7 +461,7 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I
         header += fmt::format("R{},", index);
     }
     if (program.local_memory_size > 0) {
-        header += fmt::format("lmem[{}],", program.local_memory_size);
+        header += fmt::format("lmem[{}],", Common::DivCeil(program.local_memory_size, 4U));
     }
     if (program.info.uses_fswzadd) {
         header += "FSWZA[4],FSWZB[4],";
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
index 5bfdecc09..2fc2a0ac6 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp
@@ -43,10 +43,6 @@ void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) {
     Alias(inst, value);
 }
 
-void EmitBitCastS32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) {
-    Alias(inst, value);
-}
-
 void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) {
     Alias(inst, value);
 }
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index e67e80fac..85ee27333 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -59,7 +59,14 @@ std::string Image(EmitContext& ctx, IR::TextureInstInfo info,
     }
 }
 
-std::string_view TextureType(IR::TextureInstInfo info) {
+bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) {
+    if (info.type == TextureType::Buffer) {
+        return false;
+    }
+    return ctx.info.texture_descriptors.at(info.descriptor_index).is_multisample;
+}
+
+std::string_view TextureType(IR::TextureInstInfo info, bool is_ms = false) {
     if (info.is_depth) {
         switch (info.type) {
         case TextureType::Color1D:
@@ -88,9 +95,9 @@ std::string_view TextureType(IR::TextureInstInfo info) {
             return "ARRAY1D";
         case TextureType::Color2D:
         case TextureType::Color2DRect:
-            return "2D";
+            return is_ms ? "2DMS" : "2D";
         case TextureType::ColorArray2D:
-            return "ARRAY2D";
+            return is_ms ? "ARRAY2DMS" : "ARRAY2D";
         case TextureType::Color3D:
             return "3D";
         case TextureType::ColorCube:
@@ -510,15 +517,16 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                     const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) {
     const auto info{inst.Flags<IR::TextureInstInfo>()};
     const auto sparse_inst{PrepareSparse(inst)};
+    const bool is_multisample{ms.type != Type::Void};
     const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
-    const std::string_view type{TextureType(info)};
+    const std::string_view type{TextureType(info, is_multisample)};
     const std::string texture{Texture(ctx, info, index)};
     const std::string offset_vec{Offset(ctx, offset)};
     const auto [coord_vec, coord_alloc]{Coord(ctx, coord)};
     const Register ret{ctx.reg_alloc.Define(inst)};
     if (info.type == TextureType::Buffer) {
         ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec);
-    } else if (ms.type != Type::Void) {
+    } else if (is_multisample) {
         ctx.Add("MOV.S {}.w,{};"
                 "TXFMS.F{} {},{},{},{}{};",
                 coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec);
@@ -531,10 +539,11 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
 }
 
 void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                              ScalarS32 lod) {
+                              ScalarS32 lod, [[maybe_unused]] const IR::Value& skip_mips) {
     const auto info{inst.Flags<IR::TextureInstInfo>()};
     const std::string texture{Texture(ctx, info, index)};
-    const std::string_view type{TextureType(info)};
+    const bool is_msaa{IsTextureMsaa(ctx, info)};
+    const std::string_view type{TextureType(info, is_msaa)};
     ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type);
 }
 
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
index eaaf9ba39..1a1ea61d5 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
@@ -197,7 +197,6 @@ void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Regist
 void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
 void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
 void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
-void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
 void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
 void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
 void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value);
@@ -582,7 +581,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
 void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                     const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms);
 void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                              ScalarS32 lod);
+                              ScalarS32 lod, const IR::Value& skip_mips);
 void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord);
 void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                        const IR::Value& coord, const IR::Value& derivatives,
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
index 911181c43..376a05827 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp
@@ -398,162 +398,162 @@ void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value
 }
 
 void EmitGlobalAtomicIAdd32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicSMin32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicUMin32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicSMax32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicUMax32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicInc32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicDec32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicAnd32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicOr32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicXor32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicExchange32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicIAdd64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicSMin64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicUMin64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicSMax64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicUMax64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicInc64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicDec64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicAnd64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicOr64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicXor64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicExchange64(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicIAdd32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicSMin32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicUMin32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicSMax32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicUMax32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicInc32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicDec32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicAnd32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicOr32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicXor32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicExchange32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicAddF32(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicAddF16x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicAddF32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicMinF16x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicMinF32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicMaxF16x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 
 void EmitGlobalAtomicMaxF32x2(EmitContext&) {
-    throw NotImplementedException("GLSL Instrucion");
+    throw NotImplementedException("GLSL Instruction");
 }
 } // namespace Shader::Backend::GLSL
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
index 8e5e6cf1f..1be4a0f59 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp
@@ -48,10 +48,6 @@ void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value)
     ctx.AddU64("{}=doubleBitsToUint64({});", inst, value);
 }
 
-void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
-    ctx.AddF32("{}=ftoi({});", inst, value);
-}
-
 void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) {
     NotImplemented();
 }
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
index cecdbb9d6..418505475 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -25,6 +25,13 @@ std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::V
     return fmt::format("img{}{}", def.binding, index_offset);
 }
 
+bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) {
+    if (info.type == TextureType::Buffer) {
+        return false;
+    }
+    return ctx.info.texture_descriptors.at(info.descriptor_index).is_multisample;
+}
+
 std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) {
     switch (info.type) {
     case TextureType::Color1D:
@@ -136,6 +143,21 @@ IR::Inst* PrepareSparse(IR::Inst& inst) {
     }
     return sparse_inst;
 }
+
+std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::string_view texture,
+                                      std::string_view coords) {
+    switch (info.type) {
+    case TextureType::Color2D:
+    case TextureType::Color2DRect:
+        return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture);
+    case TextureType::ColorArray2D:
+    case TextureType::ColorCube:
+        return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords,
+                           texture);
+    default:
+        return std::string{coords};
+    }
+}
 } // Anonymous namespace
 
 void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
@@ -333,6 +355,13 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
         LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
         ctx.AddU1("{}=true;", *sparse_inst);
     }
+    std::string coords_with_subpixel_offset;
+    if (ctx.profile.need_gather_subpixel_offset) {
+        // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
+        // AMD hardware as on Maxwell or other Nvidia architectures.
+        coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords);
+        coords = coords_with_subpixel_offset;
+    }
     if (!sparse_inst || !supports_sparse) {
         if (offset.IsEmpty()) {
             ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords,
@@ -380,6 +409,13 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
         LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING");
         ctx.AddU1("{}=true;", *sparse_inst);
     }
+    std::string coords_with_subpixel_offset;
+    if (ctx.profile.need_gather_subpixel_offset) {
+        // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
+        // AMD hardware as on Maxwell or other Nvidia architectures.
+        coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords);
+        coords = coords_with_subpixel_offset;
+    }
     if (!sparse_inst || !supports_sparse) {
         if (offset.IsEmpty()) {
             ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref);
@@ -414,7 +450,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde
 
 void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                     std::string_view coords, std::string_view offset, std::string_view lod,
-                    [[maybe_unused]] std::string_view ms) {
+                    std::string_view ms) {
     const auto info{inst.Flags<IR::TextureInstInfo>()};
     if (info.has_bias) {
         throw NotImplementedException("EmitImageFetch Bias texture samples");
@@ -431,19 +467,24 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
         ctx.AddU1("{}=true;", *sparse_inst);
     }
     if (!sparse_inst || !supports_sparse) {
-        if (!offset.empty()) {
-            ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture,
-                    CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info));
+        const auto int_coords{CoordsCastToInt(coords, info)};
+        if (!ms.empty()) {
+            ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, ms);
+        } else if (!offset.empty()) {
+            ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, int_coords, lod,
+                    CoordsCastToInt(offset, info));
         } else {
             if (info.type == TextureType::Buffer) {
                 ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords);
             } else {
-                ctx.Add("{}=texelFetch({},{},int({}));", texel, texture,
-                        CoordsCastToInt(coords, info), lod);
+                ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, int_coords, lod);
             }
         }
         return;
     }
+    if (!ms.empty()) {
+        throw NotImplementedException("EmitImageFetch Sparse MSAA samples");
+    }
     if (!offset.empty()) {
         ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));",
                   *sparse_inst, texture, CastToIntVec(coords, info), lod,
@@ -455,29 +496,36 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
 }
 
 void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                              std::string_view lod) {
+                              std::string_view lod, const IR::Value& skip_mips_val) {
     const auto info{inst.Flags<IR::TextureInstInfo>()};
     const auto texture{Texture(ctx, info, index)};
+    const bool is_msaa{IsTextureMsaa(ctx, info)};
+    const bool skip_mips{skip_mips_val.U1()};
+    const auto mips{skip_mips ? "0u" : fmt::format("uint(textureQueryLevels({}))", texture)};
+    if (is_msaa && !skip_mips) {
+        throw NotImplementedException("EmitImageQueryDimensions MSAA QueryLevels");
+    }
+    if (info.type == TextureType::Buffer && !skip_mips) {
+        throw NotImplementedException("EmitImageQueryDimensions TextureType::Buffer QueryLevels");
+    }
+    const bool uses_lod{!is_msaa && info.type != TextureType::Buffer};
+    const auto lod_str{uses_lod ? fmt::format(",int({})", lod) : ""};
     switch (info.type) {
     case TextureType::Color1D:
-        return ctx.AddU32x4(
-            "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst,
-            texture, lod, texture);
+        return ctx.AddU32x4("{}=uvec4(uint(textureSize({}{})),0u,0u,{});", inst, texture, lod_str,
+                            mips);
     case TextureType::ColorArray1D:
     case TextureType::Color2D:
     case TextureType::ColorCube:
     case TextureType::Color2DRect:
-        return ctx.AddU32x4(
-            "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst,
-            texture, lod, texture);
+        return ctx.AddU32x4("{}=uvec4(uvec2(textureSize({}{})),0u,{});", inst, texture, lod_str,
+                            mips);
     case TextureType::ColorArray2D:
     case TextureType::Color3D:
     case TextureType::ColorArrayCube:
-        return ctx.AddU32x4(
-            "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture,
-            lod, texture);
+        return ctx.AddU32x4("{}=uvec4(uvec3(textureSize({}{})),{});", inst, texture, lod_str, mips);
     case TextureType::Buffer:
-        throw NotImplementedException("EmitImageQueryDimensions Texture buffers");
+        return ctx.AddU32x4("{}=uvec4(uint(textureSize({})),0u,0u,{});", inst, texture, mips);
     }
     throw LogicError("Unspecified image type {}", info.type.Value());
 }
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
index 4151c89de..8d0a65047 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
@@ -231,7 +231,6 @@ void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond,
 void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst);
 void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
 void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
-void EmitBitCastS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
 void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst);
 void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value);
 void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value);
@@ -655,7 +654,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                     std::string_view coords, std::string_view offset, std::string_view lod,
                     std::string_view ms);
 void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
-                              std::string_view lod);
+                              std::string_view lod, const IR::Value& skip_mips);
 void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
                        std::string_view coords);
 void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index 5d01ec0cd..9ff4028c2 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -61,24 +61,28 @@ std::string OutputDecorator(Stage stage, u32 size) {
     }
 }
 
-std::string_view SamplerType(TextureType type, bool is_depth) {
-    if (is_depth) {
-        switch (type) {
-        case TextureType::Color1D:
-            return "sampler1DShadow";
-        case TextureType::ColorArray1D:
-            return "sampler1DArrayShadow";
-        case TextureType::Color2D:
-            return "sampler2DShadow";
-        case TextureType::ColorArray2D:
-            return "sampler2DArrayShadow";
-        case TextureType::ColorCube:
-            return "samplerCubeShadow";
-        case TextureType::ColorArrayCube:
-            return "samplerCubeArrayShadow";
-        default:
-            throw NotImplementedException("Texture type: {}", type);
-        }
+std::string_view DepthSamplerType(TextureType type) {
+    switch (type) {
+    case TextureType::Color1D:
+        return "sampler1DShadow";
+    case TextureType::ColorArray1D:
+        return "sampler1DArrayShadow";
+    case TextureType::Color2D:
+        return "sampler2DShadow";
+    case TextureType::ColorArray2D:
+        return "sampler2DArrayShadow";
+    case TextureType::ColorCube:
+        return "samplerCubeShadow";
+    case TextureType::ColorArrayCube:
+        return "samplerCubeArrayShadow";
+    default:
+        throw NotImplementedException("Texture type: {}", type);
+    }
+}
+
+std::string_view ColorSamplerType(TextureType type, bool is_multisample = false) {
+    if (is_multisample) {
+        ASSERT(type == TextureType::Color2D || type == TextureType::ColorArray2D);
     }
     switch (type) {
     case TextureType::Color1D:
@@ -87,9 +91,9 @@ std::string_view SamplerType(TextureType type, bool is_depth) {
         return "sampler1DArray";
     case TextureType::Color2D:
     case TextureType::Color2DRect:
-        return "sampler2D";
+        return is_multisample ? "sampler2DMS" : "sampler2D";
     case TextureType::ColorArray2D:
-        return "sampler2DArray";
+        return is_multisample ? "sampler2DMSArray" : "sampler2DArray";
     case TextureType::Color3D:
         return "sampler3D";
     case TextureType::ColorCube:
@@ -306,12 +310,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
         if (runtime_info.force_early_z) {
             header += "layout(early_fragment_tests)in;";
         }
-        if (info.uses_sample_id) {
-            header += "in int gl_SampleID;";
-        }
-        if (info.stores_sample_mask) {
-            header += "out int gl_SampleMask[];";
-        }
         break;
     case Stage::Compute:
         stage_name = "cs";
@@ -481,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
         const u32 remainder{4 - element};
         const TransformFeedbackVarying* xfb_varying{};
         const size_t xfb_varying_index{base_index + element};
-        if (xfb_varying_index < runtime_info.xfb_varyings.size()) {
+        if (xfb_varying_index < runtime_info.xfb_count) {
             xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index];
             xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
         }
@@ -677,7 +675,7 @@ void EmitContext::SetupTextures(Bindings& bindings) {
     texture_buffers.reserve(info.texture_buffer_descriptors.size());
     for (const auto& desc : info.texture_buffer_descriptors) {
         texture_buffers.push_back({bindings.texture, desc.count});
-        const auto sampler_type{SamplerType(TextureType::Buffer, false)};
+        const auto sampler_type{ColorSamplerType(TextureType::Buffer)};
         const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
         header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
                               sampler_type, bindings.texture, array_decorator);
@@ -686,7 +684,8 @@ void EmitContext::SetupTextures(Bindings& bindings) {
     textures.reserve(info.texture_descriptors.size());
     for (const auto& desc : info.texture_descriptors) {
         textures.push_back({bindings.texture, desc.count});
-        const auto sampler_type{SamplerType(desc.type, desc.is_depth)};
+        const auto sampler_type{desc.is_depth ? DepthSamplerType(desc.type)
+                                              : ColorSamplerType(desc.type, desc.is_multisample)};
         const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
         header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture,
                               sampler_type, bindings.texture, array_decorator);
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.h b/src/shader_recompiler/backend/glsl/glsl_emit_context.h
index dfd10ac28..7587f7bab 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.h
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.h
@@ -49,7 +49,7 @@ public:
     void Add(const char* format_str, IR::Inst& inst, Args&&... args) {
         const auto var_def{var_alloc.AddDefine(inst, type)};
         if (var_def.empty()) {
-            // skip assigment.
+            // skip assignment.
             code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...);
         } else {
             code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 0f86a8004..34592a01f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
 }
 
 void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
-    if (ctx.runtime_info.xfb_varyings.empty()) {
+    if (ctx.runtime_info.xfb_count == 0) {
         return;
     }
     ctx.AddCapability(spv::Capability::TransformFeedback);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index 4b3043b65..0ce73f289 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -69,6 +69,11 @@ Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value&
 Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
                     Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id),
                     Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
+    if (!ctx.profile.support_descriptor_aliasing) {
+        LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic.");
+        return ctx.ConstantNull(ctx.U64);
+    }
+
     if (ctx.profile.support_int64_atomics) {
         const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64,
                                         binding, offset, sizeof(u64))};
@@ -86,6 +91,11 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value&
 
 Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value,
                       Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) {
+    if (!ctx.profile.support_descriptor_aliasing) {
+        LOG_WARNING(Shader_SPIRV, "Descriptor aliasing not supported, this cannot be atomic.");
+        return ctx.ConstantNull(ctx.U32[2]);
+    }
+
     LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic");
     const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2,
                                     binding, offset, sizeof(u32[2]))};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
index 50daacd95..c4ca28d11 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -18,10 +18,6 @@ void EmitBitCastU64F64(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
 
-void EmitBitCastS32F32(EmitContext&) {
-    throw NotImplementedException("SPIR-V Instruction");
-}
-
 void EmitBitCastF16U16(EmitContext&) {
     throw NotImplementedException("SPIR-V Instruction");
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 0cd87a48f..2868fc57d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -10,27 +10,6 @@
 
 namespace Shader::Backend::SPIRV {
 namespace {
-struct AttrInfo {
-    Id pointer;
-    Id id;
-    bool needs_cast;
-};
-
-std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
-    const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
-    switch (type) {
-    case AttributeType::Float:
-        return AttrInfo{ctx.input_f32, ctx.F32[1], false};
-    case AttributeType::UnsignedInt:
-        return AttrInfo{ctx.input_u32, ctx.U32[1], true};
-    case AttributeType::SignedInt:
-        return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
-    case AttributeType::Disabled:
-        return std::nullopt;
-    }
-    throw InvalidArgument("Invalid attribute type {}", type);
-}
-
 template <typename... Args>
 Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) {
     switch (ctx.stage) {
@@ -302,15 +281,26 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
     const u32 element{static_cast<u32>(attr) % 4};
     if (IR::IsGeneric(attr)) {
         const u32 index{IR::GenericAttributeIndex(attr)};
-        const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
-        if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
+        const auto& generic{ctx.input_generics.at(index)};
+        if (!ValidId(generic.id)) {
             // Attribute is disabled or varying component is not written
             return ctx.Const(element == 3 ? 1.0f : 0.0f);
         }
-        const Id generic_id{ctx.input_generics.at(index)};
-        const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
-        const Id value{ctx.OpLoad(type->id, pointer)};
-        return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
+        const Id pointer{
+            AttrPointer(ctx, generic.pointer_type, vertex, generic.id, ctx.Const(element))};
+        const Id value{ctx.OpLoad(generic.component_type, pointer)};
+        return [&ctx, generic, value]() {
+            switch (generic.load_op) {
+            case InputGenericLoadOp::Bitcast:
+                return ctx.OpBitcast(ctx.F32[1], value);
+            case InputGenericLoadOp::SToF:
+                return ctx.OpConvertSToF(ctx.F32[1], value);
+            case InputGenericLoadOp::UToF:
+                return ctx.OpConvertUToF(ctx.F32[1], value);
+            default:
+                return value;
+            };
+        }();
     }
     switch (attr) {
     case IR::Attribute::PrimitiveId:
@@ -339,9 +329,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
         if (ctx.profile.support_vertex_instance_id) {
             return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id));
         } else {
-            const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
-            const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
-            return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
+            return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index));
         }
     case IR::Attribute::BaseInstance:
         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
@@ -386,9 +374,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
         if (ctx.profile.support_vertex_instance_id) {
             return ctx.OpLoad(ctx.U32[1], ctx.vertex_id);
         } else {
-            const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)};
-            const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
-            return ctx.OpISub(ctx.U32[1], index, base);
+            return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
         }
     case IR::Attribute::BaseInstance:
         return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
@@ -473,7 +459,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
 }
 
 void EmitSetSampleMask(EmitContext& ctx, Id value) {
-    ctx.OpStore(ctx.sample_mask, value);
+    const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)};
+    ctx.OpStore(pointer, value);
 }
 
 void EmitSetFragDepth(EmitContext& ctx, Id value) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index fb5799c42..7d901c04b 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -201,6 +201,13 @@ Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) {
     }
 }
 
+bool IsTextureMsaa(EmitContext& ctx, const IR::TextureInstInfo& info) {
+    if (info.type == TextureType::Buffer) {
+        return false;
+    }
+    return ctx.textures.at(info.descriptor_index).is_multisample;
+}
+
 Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
     const auto info{inst->Flags<IR::TextureInstInfo>()};
     if (info.relaxed_precision != 0) {
@@ -254,6 +261,30 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) {
     const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))};
     return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value);
 }
+
+Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture,
+                             Id coords) {
+    // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on
+    // AMD hardware as on Maxwell or other Nvidia architectures.
+    const auto calculate_coords{[&](size_t dim) {
+        const Id nudge{ctx.Const(0x1p-9f)};
+        const Id image_size{ctx.OpImageQuerySizeLod(ctx.U32[dim], texture, ctx.u32_zero_value)};
+        Id offset{dim == 2 ? ctx.ConstantComposite(ctx.F32[dim], nudge, nudge)
+                           : ctx.ConstantComposite(ctx.F32[dim], nudge, nudge, ctx.f32_zero_value)};
+        offset = ctx.OpFDiv(ctx.F32[dim], offset, ctx.OpConvertUToF(ctx.F32[dim], image_size));
+        return ctx.OpFAdd(ctx.F32[dim], coords, offset);
+    }};
+    switch (info.type) {
+    case TextureType::Color2D:
+    case TextureType::Color2DRect:
+        return calculate_coords(2);
+    case TextureType::ColorArray2D:
+    case TextureType::ColorCube:
+        return calculate_coords(3);
+    default:
+        return coords;
+    }
+}
 } // Anonymous namespace
 
 Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
@@ -416,6 +447,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
                    const IR::Value& offset, const IR::Value& offset2) {
     const auto info{inst->Flags<IR::TextureInstInfo>()};
     const ImageOperands operands(ctx, offset, offset2);
+    if (ctx.profile.need_gather_subpixel_offset) {
+        coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
+    }
     return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
                 ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component),
                 operands.MaskOptional(), operands.Span());
@@ -425,6 +459,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
                        const IR::Value& offset, const IR::Value& offset2, Id dref) {
     const auto info{inst->Flags<IR::TextureInstInfo>()};
     const ImageOperands operands(ctx, offset, offset2);
+    if (ctx.profile.need_gather_subpixel_offset) {
+        coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
+    }
     return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
                 ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(),
                 operands.Span());
@@ -436,34 +473,42 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
     if (info.type == TextureType::Buffer) {
         lod = Id{};
     }
+    if (Sirit::ValidId(ms)) {
+        // This image is multisampled, lod must be implicit
+        lod = Id{};
+    }
     const ImageOperands operands(offset, lod, ms);
     return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
                 TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span());
 }
 
-Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) {
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
+                            const IR::Value& skip_mips_val) {
     const auto info{inst->Flags<IR::TextureInstInfo>()};
     const Id image{TextureImage(ctx, info, index)};
     const Id zero{ctx.u32_zero_value};
-    const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }};
+    const bool skip_mips{skip_mips_val.U1()};
+    const auto mips{[&] { return skip_mips ? zero : ctx.OpImageQueryLevels(ctx.U32[1], image); }};
+    const bool is_msaa{IsTextureMsaa(ctx, info)};
+    const bool uses_lod{!is_msaa && info.type != TextureType::Buffer};
+    const auto query{[&](Id type) {
+        return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
+                        : ctx.OpImageQuerySize(type, image);
+    }};
     switch (info.type) {
     case TextureType::Color1D:
-        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod),
-                                        zero, zero, mips());
+        return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
     case TextureType::ColorArray1D:
     case TextureType::Color2D:
     case TextureType::ColorCube:
     case TextureType::Color2DRect:
-        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod),
-                                        zero, mips());
+        return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips());
     case TextureType::ColorArray2D:
     case TextureType::Color3D:
     case TextureType::ColorArrayCube:
-        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod),
-                                        mips());
+        return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips());
     case TextureType::Buffer:
-        return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero,
-                                        zero, mips());
+        return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
     }
     throw LogicError("Unspecified image type {}", info.type.Value());
 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index e31cdc5e8..a440b557d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -179,7 +179,6 @@ Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
 void EmitBitCastU16F16(EmitContext& ctx);
 Id EmitBitCastU32F32(EmitContext& ctx, Id value);
 void EmitBitCastU64F64(EmitContext& ctx);
-void EmitBitCastS32F32(EmitContext& ctx);
 void EmitBitCastF16U16(EmitContext&);
 Id EmitBitCastF32U32(EmitContext& ctx, Id value);
 void EmitBitCastF64U64(EmitContext& ctx);
@@ -540,7 +539,8 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
                        const IR::Value& offset, const IR::Value& offset2, Id dref);
 Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
                   Id lod, Id ms);
-Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod);
+Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod,
+                            const IR::Value& skip_mips);
 Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
 Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
                      Id derivates, Id offset, Id lod_clamp);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index c5db19d09..77ff8c573 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -17,7 +17,22 @@ Id GetThreadId(EmitContext& ctx) {
 Id WarpExtract(EmitContext& ctx, Id value) {
     const Id thread_id{GetThreadId(ctx)};
     const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
-    return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+    if (ctx.profile.has_broken_spirv_subgroup_mask_vector_extract_dynamic) {
+        const Id c0_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(0U)),
+                                     ctx.OpCompositeExtract(ctx.U32[1], value, 0U), ctx.Const(0U))};
+        const Id c1_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(1U)),
+                                     ctx.OpCompositeExtract(ctx.U32[1], value, 1U), ctx.Const(0U))};
+        const Id c2_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(2U)),
+                                     ctx.OpCompositeExtract(ctx.U32[1], value, 2U), ctx.Const(0U))};
+        const Id c3_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(3U)),
+                                     ctx.OpCompositeExtract(ctx.U32[1], value, 3U), ctx.Const(0U))};
+        const Id c0_or_c1{ctx.OpBitwiseOr(ctx.U32[1], c0_sel, c1_sel)};
+        const Id c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c2_sel, c3_sel)};
+        const Id c0_or_c1_or_c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c0_or_c1, c2_or_c3)};
+        return c0_or_c1_or_c2_or_c3;
+    } else {
+        return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
+    }
 }
 
 Id LoadMask(EmitContext& ctx, Id mask) {
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index a0c155fdb..bec5db173 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -25,16 +25,11 @@ enum class Operation {
     FPMax,
 };
 
-struct AttrInfo {
-    Id pointer;
-    Id id;
-    bool needs_cast;
-};
-
 Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
     const spv::ImageFormat format{spv::ImageFormat::Unknown};
     const Id type{ctx.F32[1]};
     const bool depth{desc.is_depth};
+    const bool ms{desc.is_multisample};
     switch (desc.type) {
     case TextureType::Color1D:
         return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format);
@@ -42,9 +37,9 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
         return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format);
     case TextureType::Color2D:
     case TextureType::Color2DRect:
-        return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format);
+        return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, ms, 1, format);
     case TextureType::ColorArray2D:
-        return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format);
+        return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, ms, 1, format);
     case TextureType::Color3D:
         return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format);
     case TextureType::ColorCube:
@@ -165,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo
         const u32 remainder{4 - element};
         const TransformFeedbackVarying* xfb_varying{};
         const size_t xfb_varying_index{base_attr_index + element};
-        if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) {
+        if (xfb_varying_index < ctx.runtime_info.xfb_count) {
             xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index];
             xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
         }
@@ -205,23 +200,37 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) {
         return ctx.TypeVector(ctx.TypeInt(32, true), 4);
     case AttributeType::UnsignedInt:
         return ctx.U32[4];
+    case AttributeType::SignedScaled:
+        return ctx.profile.support_scaled_attributes ? ctx.F32[4]
+                                                     : ctx.TypeVector(ctx.TypeInt(32, true), 4);
+    case AttributeType::UnsignedScaled:
+        return ctx.profile.support_scaled_attributes ? ctx.F32[4] : ctx.U32[4];
     case AttributeType::Disabled:
         break;
     }
     throw InvalidArgument("Invalid attribute type {}", type);
 }
 
-std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) {
-    const AttributeType type{ctx.runtime_info.generic_input_types.at(index)};
+InputGenericInfo GetAttributeInfo(EmitContext& ctx, AttributeType type, Id id) {
     switch (type) {
     case AttributeType::Float:
-        return AttrInfo{ctx.input_f32, ctx.F32[1], false};
+        return InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None};
     case AttributeType::UnsignedInt:
-        return AttrInfo{ctx.input_u32, ctx.U32[1], true};
+        return InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::Bitcast};
     case AttributeType::SignedInt:
-        return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true};
+        return InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true),
+                                InputGenericLoadOp::Bitcast};
+    case AttributeType::SignedScaled:
+        return ctx.profile.support_scaled_attributes
+                   ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None}
+                   : InputGenericInfo{id, ctx.input_s32, ctx.TypeInt(32, true),
+                                      InputGenericLoadOp::SToF};
+    case AttributeType::UnsignedScaled:
+        return ctx.profile.support_scaled_attributes
+                   ? InputGenericInfo{id, ctx.input_f32, ctx.F32[1], InputGenericLoadOp::None}
+                   : InputGenericInfo{id, ctx.input_u32, ctx.U32[1], InputGenericLoadOp::UToF};
     case AttributeType::Disabled:
-        return std::nullopt;
+        return InputGenericInfo{};
     }
     throw InvalidArgument("Invalid attribute type {}", type);
 }
@@ -745,18 +754,29 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
                 continue;
             }
             AddLabel(labels[label_index]);
-            const auto type{AttrTypes(*this, static_cast<u32>(index))};
-            if (!type) {
+            const auto& generic{input_generics.at(index)};
+            const Id generic_id{generic.id};
+            if (!ValidId(generic_id)) {
                 OpReturnValue(Const(0.0f));
                 ++label_index;
                 continue;
             }
-            const Id generic_id{input_generics.at(index)};
-            const Id pointer{is_array
-                                 ? OpAccessChain(type->pointer, generic_id, vertex, masked_index)
-                                 : OpAccessChain(type->pointer, generic_id, masked_index)};
-            const Id value{OpLoad(type->id, pointer)};
-            const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value};
+            const Id pointer{
+                is_array ? OpAccessChain(generic.pointer_type, generic_id, vertex, masked_index)
+                         : OpAccessChain(generic.pointer_type, generic_id, masked_index)};
+            const Id value{OpLoad(generic.component_type, pointer)};
+            const Id result{[this, generic, value]() {
+                switch (generic.load_op) {
+                case InputGenericLoadOp::Bitcast:
+                    return OpBitcast(F32[1], value);
+                case InputGenericLoadOp::SToF:
+                    return OpConvertSToF(F32[1], value);
+                case InputGenericLoadOp::UToF:
+                    return OpConvertUToF(F32[1], value);
+                default:
+                    return value;
+                };
+            }()};
             OpReturnValue(result);
             ++label_index;
         }
@@ -1287,6 +1307,7 @@ void EmitContext::DefineTextures(const Info& info, u32& binding, u32& scaling_in
             .pointer_type = pointer_type,
             .image_type = image_type,
             .count = desc.count,
+            .is_multisample = desc.is_multisample,
         });
         if (profile.supported_spirv >= 0x00010400) {
             interfaces.push_back(id);
@@ -1455,7 +1476,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
         const Id id{DefineInput(*this, type, true)};
         Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
         Name(id, fmt::format("in_attr{}", index));
-        input_generics[index] = id;
+        input_generics[index] = GetAttributeInfo(*this, input_type, id);
 
         if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) {
             Decorate(id, spv::Decoration::PassthroughNV);
@@ -1570,7 +1591,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
             Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
         }
         if (info.stores_sample_mask) {
-            sample_mask = DefineOutput(*this, U32[1], std::nullopt);
+            const Id array_type{TypeArray(U32[1], Const(1U))};
+            sample_mask = DefineOutput(*this, array_type, std::nullopt);
             Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
         }
         break;
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index dbc5c55b9..e63330f11 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -35,6 +35,7 @@ struct TextureDefinition {
     Id pointer_type;
     Id image_type;
     u32 count;
+    bool is_multisample;
 };
 
 struct TextureBufferDefinition {
@@ -94,6 +95,20 @@ struct StorageDefinitions {
     Id U32x4{};
 };
 
+enum class InputGenericLoadOp {
+    None,
+    Bitcast,
+    SToF,
+    UToF,
+};
+
+struct InputGenericInfo {
+    Id id;
+    Id pointer_type;
+    Id component_type;
+    InputGenericLoadOp load_op;
+};
+
 struct GenericElementInfo {
     Id id{};
     u32 first_element{};
@@ -282,7 +297,7 @@ public:
 
     bool need_input_position_indirect{};
     Id input_position{};
-    std::array<Id, 32> input_generics{};
+    std::array<InputGenericInfo, 32> input_generics{};
 
     Id output_point_size{};
     Id output_position{};
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index eb2e49a68..b7caa4246 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -704,11 +704,6 @@ IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
 }
 
 template <>
-IR::S32 IREmitter::BitCast<IR::S32, IR::F32>(const IR::F32& value) {
-    return Inst<IR::S32>(Opcode::BitCastS32F32, value);
-}
-
-template <>
 IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
     return Inst<IR::F32>(Opcode::BitCastF32U32, value);
 }
@@ -1851,15 +1846,16 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu
     return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling);
 }
 
-Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) {
+Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
+                                     const IR::U1& skip_mips) {
     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions
                                          : Opcode::BindlessImageQueryDimensions};
-    return Inst(op, handle, lod);
+    return Inst(op, handle, lod, skip_mips);
 }
 
 Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod,
-                                     TextureInstInfo info) {
-    return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod);
+                                     const IR::U1& skip_mips, TextureInstInfo info) {
+    return Inst(Opcode::ImageQueryDimensions, Flags{info}, handle, lod, skip_mips);
 }
 
 Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 7aaaa4ab0..f3c81dbe1 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -320,9 +320,10 @@ public:
     [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
                                                  const F32& dref, const F32& lod,
                                                  const Value& offset, TextureInstInfo info);
-    [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
     [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
-                                            TextureInstInfo info);
+                                            const IR::U1& skip_mips);
+    [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
+                                            const IR::U1& skip_mips, TextureInstInfo info);
 
     [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
                                       TextureInstInfo info);
@@ -408,7 +409,8 @@ private:
     }
 
     template <typename T>
-    requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
+        requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>)
+    struct Flags {
         Flags() = default;
         Flags(T proxy_) : proxy{proxy_} {}
 
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
index d155afd0f..e300714f3 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.h
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -38,7 +38,6 @@ constexpr Type U8{Type::U8};
 constexpr Type U16{Type::U16};
 constexpr Type U32{Type::U32};
 constexpr Type U64{Type::U64};
-constexpr Type S32{Type::S32};
 constexpr Type F16{Type::F16};
 constexpr Type F32{Type::F32};
 constexpr Type F64{Type::F64};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 1fe3749cc..4447d67b0 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -175,7 +175,6 @@ OPCODE(SelectF64,                                           F64,            U1,
 OPCODE(BitCastU16F16,                                       U16,            F16,                                                                            )
 OPCODE(BitCastU32F32,                                       U32,            F32,                                                                            )
 OPCODE(BitCastU64F64,                                       U64,            F64,                                                                            )
-OPCODE(BitCastS32F32,                                       S32,            F32,                                                                            )
 OPCODE(BitCastF16U16,                                       F16,            U16,                                                                            )
 OPCODE(BitCastF32U32,                                       F32,            U32,                                                                            )
 OPCODE(BitCastF64U64,                                       F64,            U64,                                                                            )
@@ -483,7 +482,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod,                  F32,            U32,
 OPCODE(BindlessImageGather,                                 F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
 OPCODE(BindlessImageGatherDref,                             F32x4,          U32,            Opaque,         Opaque,         Opaque,         F32,            )
 OPCODE(BindlessImageFetch,                                  F32x4,          U32,            Opaque,         Opaque,         U32,            Opaque,         )
-OPCODE(BindlessImageQueryDimensions,                        U32x4,          U32,            U32,                                                            )
+OPCODE(BindlessImageQueryDimensions,                        U32x4,          U32,            U32,            U1,                                             )
 OPCODE(BindlessImageQueryLod,                               F32x4,          U32,            Opaque,                                                         )
 OPCODE(BindlessImageGradient,                               F32x4,          U32,            Opaque,         Opaque,         Opaque,         Opaque,         )
 OPCODE(BindlessImageRead,                                   U32x4,          U32,            Opaque,                                                         )
@@ -496,7 +495,7 @@ OPCODE(BoundImageSampleDrefExplicitLod,                     F32,            U32,
 OPCODE(BoundImageGather,                                    F32x4,          U32,            Opaque,         Opaque,         Opaque,                         )
 OPCODE(BoundImageGatherDref,                                F32x4,          U32,            Opaque,         Opaque,         Opaque,         F32,            )
 OPCODE(BoundImageFetch,                                     F32x4,          U32,            Opaque,         Opaque,         U32,            Opaque,         )
-OPCODE(BoundImageQueryDimensions,                           U32x4,          U32,            U32,                                                            )
+OPCODE(BoundImageQueryDimensions,                           U32x4,          U32,            U32,            U1,                                             )
 OPCODE(BoundImageQueryLod,                                  F32x4,          U32,            Opaque,                                                         )
 OPCODE(BoundImageGradient,                                  F32x4,          U32,            Opaque,         Opaque,         Opaque,         Opaque,         )
 OPCODE(BoundImageRead,                                      U32x4,          U32,            Opaque,                                                         )
@@ -509,7 +508,7 @@ OPCODE(ImageSampleDrefExplicitLod,                          F32,            Opaq
 OPCODE(ImageGather,                                         F32x4,          Opaque,         Opaque,         Opaque,         Opaque,                         )
 OPCODE(ImageGatherDref,                                     F32x4,          Opaque,         Opaque,         Opaque,         Opaque,         F32,            )
 OPCODE(ImageFetch,                                          F32x4,          Opaque,         Opaque,         Opaque,         U32,            Opaque,         )
-OPCODE(ImageQueryDimensions,                                U32x4,          Opaque,         U32,                                                            )
+OPCODE(ImageQueryDimensions,                                U32x4,          Opaque,         U32,            U1,                                             )
 OPCODE(ImageQueryLod,                                       F32x4,          Opaque,         Opaque,                                                         )
 OPCODE(ImageGradient,                                       F32x4,          Opaque,         Opaque,         Opaque,         Opaque,         Opaque,         )
 OPCODE(ImageRead,                                           U32x4,          Opaque,         Opaque,                                                         )
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
index 5a7c706ad..04c8c4ddb 100644
--- a/src/shader_recompiler/frontend/ir/type.h
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -24,22 +24,21 @@ enum class Type {
     U16 = 1 << 7,
     U32 = 1 << 8,
     U64 = 1 << 9,
-    S32 = 1 << 10,
-    F16 = 1 << 11,
-    F32 = 1 << 12,
-    F64 = 1 << 13,
-    U32x2 = 1 << 14,
-    U32x3 = 1 << 15,
-    U32x4 = 1 << 16,
-    F16x2 = 1 << 17,
-    F16x3 = 1 << 18,
-    F16x4 = 1 << 19,
-    F32x2 = 1 << 20,
-    F32x3 = 1 << 21,
-    F32x4 = 1 << 22,
-    F64x2 = 1 << 23,
-    F64x3 = 1 << 24,
-    F64x4 = 1 << 25,
+    F16 = 1 << 10,
+    F32 = 1 << 11,
+    F64 = 1 << 12,
+    U32x2 = 1 << 13,
+    U32x3 = 1 << 14,
+    U32x4 = 1 << 15,
+    F16x2 = 1 << 16,
+    F16x3 = 1 << 17,
+    F16x4 = 1 << 18,
+    F32x2 = 1 << 19,
+    F32x3 = 1 << 20,
+    F32x4 = 1 << 21,
+    F64x2 = 1 << 22,
+    F64x3 = 1 << 23,
+    F64x4 = 1 << 24,
 };
 DECLARE_ENUM_FLAG_OPERATORS(Type)
 
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
index 30ba12316..346169328 100644
--- a/src/shader_recompiler/frontend/ir/value.cpp
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -23,8 +23,6 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
 
 Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
 
-Value::Value(s32 value) noexcept : type{Type::S32}, imm_s32{value} {}
-
 Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
 
 Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
@@ -71,7 +69,6 @@ bool Value::operator==(const Value& other) const {
         return imm_u16 == other.imm_u16;
     case Type::U32:
     case Type::F32:
-    case Type::S32:
         return imm_u32 == other.imm_u32;
     case Type::U64:
     case Type::F64:
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
index 8b34356fd..c27546b0e 100644
--- a/src/shader_recompiler/frontend/ir/value.h
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -43,7 +43,6 @@ public:
     explicit Value(u8 value) noexcept;
     explicit Value(u16 value) noexcept;
     explicit Value(u32 value) noexcept;
-    explicit Value(s32 value) noexcept;
     explicit Value(f32 value) noexcept;
     explicit Value(u64 value) noexcept;
     explicit Value(f64 value) noexcept;
@@ -66,7 +65,6 @@ public:
     [[nodiscard]] u8 U8() const;
     [[nodiscard]] u16 U16() const;
     [[nodiscard]] u32 U32() const;
-    [[nodiscard]] s32 S32() const;
     [[nodiscard]] f32 F32() const;
     [[nodiscard]] u64 U64() const;
     [[nodiscard]] f64 F64() const;
@@ -86,7 +84,6 @@ private:
         u8 imm_u8;
         u16 imm_u16;
         u32 imm_u32;
-        s32 imm_s32;
         f32 imm_f32;
         u64 imm_u64;
         f64 imm_f64;
@@ -101,9 +98,8 @@ public:
     TypedValue() = default;
 
     template <IR::Type other_type>
-    requires((other_type & type_) != IR::Type::Void) explicit(false)
-        TypedValue(const TypedValue<other_type>& value)
-        : Value(value) {}
+        requires((other_type & type_) != IR::Type::Void)
+    explicit(false) TypedValue(const TypedValue<other_type>& value) : Value(value) {}
 
     explicit TypedValue(const Value& value) : Value(value) {
         if ((value.Type() & type_) == IR::Type::Void) {
@@ -194,16 +190,16 @@ public:
     void ReplaceOpcode(IR::Opcode opcode);
 
     template <typename FlagsType>
-    requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
-        [[nodiscard]] FlagsType Flags() const noexcept {
+        requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+    [[nodiscard]] FlagsType Flags() const noexcept {
         FlagsType ret;
         std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
         return ret;
     }
 
     template <typename FlagsType>
-    requires(sizeof(FlagsType) <= sizeof(u32) &&
-             std::is_trivially_copyable_v<FlagsType>) void SetFlags(FlagsType value) noexcept {
+        requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
+    void SetFlags(FlagsType value) noexcept {
         std::memcpy(&flags, &value, sizeof(value));
     }
 
@@ -268,7 +264,6 @@ using U8 = TypedValue<Type::U8>;
 using U16 = TypedValue<Type::U16>;
 using U32 = TypedValue<Type::U32>;
 using U64 = TypedValue<Type::U64>;
-using S32 = TypedValue<Type::S32>;
 using F16 = TypedValue<Type::F16>;
 using F32 = TypedValue<Type::F32>;
 using F64 = TypedValue<Type::F64>;
@@ -380,14 +375,6 @@ inline u32 Value::U32() const {
     return imm_u32;
 }
 
-inline s32 Value::S32() const {
-    if (IsIdentity()) {
-        return inst->Arg(0).S32();
-    }
-    DEBUG_ASSERT(type == Type::S32);
-    return imm_s32;
-}
-
 inline f32 Value::F32() const {
     if (IsIdentity()) {
         return inst->Arg(0).F32();
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
index 442365a26..c2a0ee6f1 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -30,7 +30,7 @@ void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& hi
     union {
         u64 insn;
         BitField<0, 8, IR::Reg> dest_reg;
-        BitField<0, 8, IR::Reg> lo_bits_reg;
+        BitField<8, 8, IR::Reg> lo_bits_reg;
         BitField<37, 2, MaxShift> max_shift;
         BitField<47, 1, u64> cc;
         BitField<48, 2, u64> x_mode;
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
index 639da1e9c..eeb49444f 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -102,12 +102,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
         }
         IR::F32 value{v.ir.CompositeExtract(sample, element)};
         if (element < 2) {
-            IR::U32 casted_value;
-            if (element == 0) {
-                casted_value = v.ir.ConvertFToU(32, value);
-            } else {
-                casted_value = v.ir.ConvertFToS(16, value);
-            }
+            IR::U32 casted_value = v.ir.ConvertFToU(32, value);
             v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
         } else {
             v.F(dest_reg, value);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
index f8cfd4ab6..39af62559 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -15,11 +15,13 @@ enum class Mode : u64 {
     SamplePos = 5,
 };
 
-IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
+IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg, u64 mask) {
     switch (mode) {
     case Mode::Dimension: {
+        const bool needs_num_mips{((mask >> 3) & 1) != 0};
+        const IR::U1 skip_mips{v.ir.Imm1(!needs_num_mips)};
         const IR::U32 lod{v.X(src_reg)};
-        return v.ir.ImageQueryDimension(handle, lod);
+        return v.ir.ImageQueryDimension(handle, lod, skip_mips);
     }
     case Mode::TextureType:
     case Mode::SamplePos:
@@ -46,7 +48,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
         handle = v.X(src_reg);
         ++src_reg;
     }
-    const IR::Value query{Query(v, handle, txq.mode, src_reg)};
+    const IR::Value query{Query(v, handle, txq.mode, src_reg, txq.mask)};
     IR::Reg dest_reg{txq.dest_reg};
     for (int element = 0; element < 4; ++element) {
         if (((txq.mask >> element) & 1) == 0) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index a42453e90..928b35561 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -280,19 +280,25 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
     RemoveUnreachableBlocks(program);
 
     // Replace instructions before the SSA rewrite
+    if (!host_info.support_float64) {
+        Optimization::LowerFp64ToFp32(program);
+    }
     if (!host_info.support_float16) {
         Optimization::LowerFp16ToFp32(program);
     }
     if (!host_info.support_int64) {
         Optimization::LowerInt64ToInt32(program);
     }
+    if (!host_info.support_conditional_barrier) {
+        Optimization::ConditionalBarrierPass(program);
+    }
     Optimization::SsaRewritePass(program);
 
     Optimization::ConstantPropagationPass(env, program);
 
     Optimization::PositionPass(env, program);
 
-    Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
+    Optimization::GlobalMemoryToStorageBufferPass(program);
     Optimization::TexturePass(env, program, host_info);
 
     if (Settings::values.resolution_info.active) {
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 55fc48768..7d2ded907 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -10,14 +10,16 @@ namespace Shader {
 
 /// Misc information about the host
 struct HostTranslateInfo {
+    bool support_float64{};      ///< True when the device supports 64-bit floats
     bool support_float16{};      ///< True when the device supports 16-bit floats
     bool support_int64{};        ///< True when the device supports 64-bit integers
     bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
     bool support_snorm_render_buffer{};  ///< True when the device supports SNORM render buffers
     bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
-    u32 min_ssbo_alignment{};            ///< Minimum alignment supported by the device for SSBOs
     bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
                                                 ///< passthrough shaders
+    bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
+                                        ///< control flow
 };
 
 } // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 5a4195217..70292686f 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -424,6 +424,10 @@ void VisitUsages(Info& info, IR::Inst& inst) {
         info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
         info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
         break;
+    case IR::Opcode::LoadLocal:
+    case IR::Opcode::WriteLocal:
+        info.uses_local_memory = true;
+        break;
     default:
         break;
     }
diff --git a/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp
new file mode 100644
index 000000000..c3ed27f4f
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+
+void ConditionalBarrierPass(IR::Program& program) {
+    s32 conditional_control_flow_count{0};
+    s32 conditional_return_count{0};
+    for (IR::AbstractSyntaxNode& node : program.syntax_list) {
+        switch (node.type) {
+        case IR::AbstractSyntaxNode::Type::If:
+        case IR::AbstractSyntaxNode::Type::Loop:
+            conditional_control_flow_count++;
+            break;
+        case IR::AbstractSyntaxNode::Type::EndIf:
+        case IR::AbstractSyntaxNode::Type::Repeat:
+            conditional_control_flow_count--;
+            break;
+        case IR::AbstractSyntaxNode::Type::Unreachable:
+        case IR::AbstractSyntaxNode::Type::Return:
+            if (conditional_control_flow_count > 0) {
+                conditional_return_count++;
+            }
+            break;
+        case IR::AbstractSyntaxNode::Type::Block:
+            for (IR::Inst& inst : node.data.block->Instructions()) {
+                if ((conditional_control_flow_count > 0 || conditional_return_count > 0) &&
+                    inst.GetOpcode() == IR::Opcode::Barrier) {
+                    LOG_WARNING(Shader, "Barrier within conditional control flow");
+                    inst.ReplaceOpcode(IR::Opcode::Identity);
+                }
+            }
+            break;
+        default:
+            break;
+        }
+    }
+    ASSERT(conditional_control_flow_count == 0);
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 9101722ba..d1e59f22e 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,7 +11,6 @@
 #include "shader_recompiler/frontend/ir/breadth_first_search.h"
 #include "shader_recompiler/frontend/ir/ir_emitter.h"
 #include "shader_recompiler/frontend/ir/value.h"
-#include "shader_recompiler/host_translate_info.h"
 #include "shader_recompiler/ir_opt/passes.h"
 
 namespace Shader::Optimization {
@@ -36,6 +35,7 @@ struct Bias {
     u32 index;
     u32 offset_begin;
     u32 offset_end;
+    u32 alignment;
 };
 
 using boost::container::flat_set;
@@ -350,7 +350,8 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
             .index = index.U32(),
             .offset = offset.U32(),
         };
-        if (!Common::IsAligned(storage_buffer.offset, 16)) {
+        const u32 alignment{bias ? bias->alignment : 8U};
+        if (!Common::IsAligned(storage_buffer.offset, alignment)) {
             // The SSBO pointer has to be aligned
             return std::nullopt;
         }
@@ -372,6 +373,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
         .index = 0,
         .offset_begin = 0x110,
         .offset_end = 0x610,
+        .alignment = 16,
     };
     // Track the low address of the instruction
     const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@@ -387,8 +389,11 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
         storage_buffer = Track(low_addr, nullptr);
         if (!storage_buffer) {
             // If that also fails, use NVN fallbacks
+            LOG_WARNING(Shader, "Storage buffer failed to track, using global memory fallbacks");
             return;
         }
+        LOG_WARNING(Shader, "Storage buffer tracked without bias, index {} offset {}",
+                    storage_buffer->index, storage_buffer->offset);
     }
     // Collect storage buffer and the instruction
     if (IsGlobalMemoryWrite(inst)) {
@@ -403,7 +408,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
 }
 
 /// Returns the offset in indices (not bytes) for an equivalent storage instruction
-IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
+IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
     IR::U32 offset;
     if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -416,10 +421,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
     }
     // Subtract the least significant 32 bits from the guest offset. The result is the storage
     // buffer offset in bytes.
-    IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
-
-    // Align the offset base to match the host alignment requirements
-    low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
+    const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
     return ir.ISub(offset, low_cbuf);
 }
 
@@ -514,7 +516,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
 }
 } // Anonymous namespace
 
-void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
+void GlobalMemoryToStorageBufferPass(IR::Program& program) {
     StorageInfo info;
     for (IR::Block* const block : program.post_order_blocks) {
         for (IR::Inst& inst : block->Instructions()) {
@@ -538,8 +540,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn
         const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
         IR::Block* const block{storage_inst.block};
         IR::Inst* const inst{storage_inst.inst};
-        const IR::U32 offset{
-            StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
+        const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
         Replace(*block, *inst, index, offset);
     }
 }
diff --git a/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
new file mode 100644
index 000000000..5db7a38ad
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
@@ -0,0 +1,185 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/frontend/ir/ir_emitter.h"
+#include "shader_recompiler/frontend/ir/opcodes.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/ir_opt/passes.h"
+
+namespace Shader::Optimization {
+namespace {
+
+constexpr s32 F64ToF32Exp = +1023 - 127;
+constexpr s32 F32ToF64Exp = +127 - 1023;
+
+IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) {
+    const IR::U32 lo{ir.CompositeExtract(packed, 0)};
+    const IR::U32 hi{ir.CompositeExtract(packed, 1)};
+    const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))};
+    const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))};
+    const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))};
+    const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))};
+    const IR::U32 mantissa{
+        ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)};
+    const IR::U32 exp_if_subnorm{
+        ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))};
+    const IR::U32 exp_if_infnan{
+        ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)};
+    const IR::U32 result{
+        ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
+                     ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))};
+    return ir.BitCast<IR::F32>(result);
+}
+
+IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) {
+    const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))};
+    const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))};
+    const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))};
+    const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))};
+    const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))};
+    const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))};
+    const IR::U32 exp_if_subnorm{
+        ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))};
+    const IR::U32 exp_if_infnan{
+        ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)};
+    const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))};
+    const IR::U32 hi{
+        ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
+                     ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))};
+    return ir.CompositeConstruct(lo, hi);
+}
+
+IR::Opcode Replace(IR::Opcode op) {
+    switch (op) {
+    case IR::Opcode::FPAbs64:
+        return IR::Opcode::FPAbs32;
+    case IR::Opcode::FPAdd64:
+        return IR::Opcode::FPAdd32;
+    case IR::Opcode::FPCeil64:
+        return IR::Opcode::FPCeil32;
+    case IR::Opcode::FPFloor64:
+        return IR::Opcode::FPFloor32;
+    case IR::Opcode::FPFma64:
+        return IR::Opcode::FPFma32;
+    case IR::Opcode::FPMul64:
+        return IR::Opcode::FPMul32;
+    case IR::Opcode::FPNeg64:
+        return IR::Opcode::FPNeg32;
+    case IR::Opcode::FPRoundEven64:
+        return IR::Opcode::FPRoundEven32;
+    case IR::Opcode::FPSaturate64:
+        return IR::Opcode::FPSaturate32;
+    case IR::Opcode::FPClamp64:
+        return IR::Opcode::FPClamp32;
+    case IR::Opcode::FPTrunc64:
+        return IR::Opcode::FPTrunc32;
+    case IR::Opcode::CompositeConstructF64x2:
+        return IR::Opcode::CompositeConstructF32x2;
+    case IR::Opcode::CompositeConstructF64x3:
+        return IR::Opcode::CompositeConstructF32x3;
+    case IR::Opcode::CompositeConstructF64x4:
+        return IR::Opcode::CompositeConstructF32x4;
+    case IR::Opcode::CompositeExtractF64x2:
+        return IR::Opcode::CompositeExtractF32x2;
+    case IR::Opcode::CompositeExtractF64x3:
+        return IR::Opcode::CompositeExtractF32x3;
+    case IR::Opcode::CompositeExtractF64x4:
+        return IR::Opcode::CompositeExtractF32x4;
+    case IR::Opcode::CompositeInsertF64x2:
+        return IR::Opcode::CompositeInsertF32x2;
+    case IR::Opcode::CompositeInsertF64x3:
+        return IR::Opcode::CompositeInsertF32x3;
+    case IR::Opcode::CompositeInsertF64x4:
+        return IR::Opcode::CompositeInsertF32x4;
+    case IR::Opcode::FPOrdEqual64:
+        return IR::Opcode::FPOrdEqual32;
+    case IR::Opcode::FPUnordEqual64:
+        return IR::Opcode::FPUnordEqual32;
+    case IR::Opcode::FPOrdNotEqual64:
+        return IR::Opcode::FPOrdNotEqual32;
+    case IR::Opcode::FPUnordNotEqual64:
+        return IR::Opcode::FPUnordNotEqual32;
+    case IR::Opcode::FPOrdLessThan64:
+        return IR::Opcode::FPOrdLessThan32;
+    case IR::Opcode::FPUnordLessThan64:
+        return IR::Opcode::FPUnordLessThan32;
+    case IR::Opcode::FPOrdGreaterThan64:
+        return IR::Opcode::FPOrdGreaterThan32;
+    case IR::Opcode::FPUnordGreaterThan64:
+        return IR::Opcode::FPUnordGreaterThan32;
+    case IR::Opcode::FPOrdLessThanEqual64:
+        return IR::Opcode::FPOrdLessThanEqual32;
+    case IR::Opcode::FPUnordLessThanEqual64:
+        return IR::Opcode::FPUnordLessThanEqual32;
+    case IR::Opcode::FPOrdGreaterThanEqual64:
+        return IR::Opcode::FPOrdGreaterThanEqual32;
+    case IR::Opcode::FPUnordGreaterThanEqual64:
+        return IR::Opcode::FPUnordGreaterThanEqual32;
+    case IR::Opcode::FPIsNan64:
+        return IR::Opcode::FPIsNan32;
+    case IR::Opcode::ConvertS16F64:
+        return IR::Opcode::ConvertS16F32;
+    case IR::Opcode::ConvertS32F64:
+        return IR::Opcode::ConvertS32F32;
+    case IR::Opcode::ConvertS64F64:
+        return IR::Opcode::ConvertS64F32;
+    case IR::Opcode::ConvertU16F64:
+        return IR::Opcode::ConvertU16F32;
+    case IR::Opcode::ConvertU32F64:
+        return IR::Opcode::ConvertU32F32;
+    case IR::Opcode::ConvertU64F64:
+        return IR::Opcode::ConvertU64F32;
+    case IR::Opcode::ConvertF32F64:
+        return IR::Opcode::Identity;
+    case IR::Opcode::ConvertF64F32:
+        return IR::Opcode::Identity;
+    case IR::Opcode::ConvertF64S8:
+        return IR::Opcode::ConvertF32S8;
+    case IR::Opcode::ConvertF64S16:
+        return IR::Opcode::ConvertF32S16;
+    case IR::Opcode::ConvertF64S32:
+        return IR::Opcode::ConvertF32S32;
+    case IR::Opcode::ConvertF64S64:
+        return IR::Opcode::ConvertF32S64;
+    case IR::Opcode::ConvertF64U8:
+        return IR::Opcode::ConvertF32U8;
+    case IR::Opcode::ConvertF64U16:
+        return IR::Opcode::ConvertF32U16;
+    case IR::Opcode::ConvertF64U32:
+        return IR::Opcode::ConvertF32U32;
+    case IR::Opcode::ConvertF64U64:
+        return IR::Opcode::ConvertF32U64;
+    default:
+        return op;
+    }
+}
+
+void Lower(IR::Block& block, IR::Inst& inst) {
+    switch (inst.GetOpcode()) {
+    case IR::Opcode::PackDouble2x32: {
+        IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+        inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0)));
+        break;
+    }
+    case IR::Opcode::UnpackDouble2x32: {
+        IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
+        inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0)));
+        break;
+    }
+    default:
+        inst.ReplaceOpcode(Replace(inst.GetOpcode()));
+        break;
+    }
+}
+
+} // Anonymous namespace
+
+void LowerFp64ToFp32(IR::Program& program) {
+    for (IR::Block* const block : program.blocks) {
+        for (IR::Inst& inst : block->Instructions()) {
+            Lower(*block, inst);
+        }
+    }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 4ffad1172..629d18fa1 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -13,10 +13,12 @@ struct HostTranslateInfo;
 namespace Shader::Optimization {
 
 void CollectShaderInfoPass(Environment& env, IR::Program& program);
+void ConditionalBarrierPass(IR::Program& program);
 void ConstantPropagationPass(Environment& env, IR::Program& program);
 void DeadCodeEliminationPass(IR::Program& program);
-void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
+void GlobalMemoryToStorageBufferPass(IR::Program& program);
 void IdentityRemovalPass(IR::Program& program);
+void LowerFp64ToFp32(IR::Program& program);
 void LowerFp16ToFp32(IR::Program& program);
 void LowerInt64ToInt32(IR::Program& program);
 void RescalingPass(IR::Program& program);
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index f5c86fcb1..d374c976a 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -355,21 +355,21 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
     };
 }
 
-TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+u32 GetTextureHandle(Environment& env, const ConstBufferAddr& cbuf) {
     const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
     const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
     const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset) << cbuf.shift_left};
     const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)
                       << cbuf.secondary_shift_left};
-    return env.ReadTextureType(lhs_raw | rhs_raw);
+    return lhs_raw | rhs_raw;
+}
+
+TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) {
+    return env.ReadTextureType(GetTextureHandle(env, cbuf));
 }
 
 TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAddr& cbuf) {
-    const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index};
-    const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset};
-    const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)};
-    const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)};
-    return env.ReadTexturePixelFormat(lhs_raw | rhs_raw);
+    return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf));
 }
 
 class Descriptors {
@@ -386,8 +386,10 @@ public:
         return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) {
             return desc.cbuf_index == existing.cbuf_index &&
                    desc.cbuf_offset == existing.cbuf_offset &&
+                   desc.shift_left == existing.shift_left &&
                    desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
                    desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+                   desc.secondary_shift_left == existing.secondary_shift_left &&
                    desc.count == existing.count && desc.size_shift == existing.size_shift &&
                    desc.has_secondary == existing.has_secondary;
         });
@@ -405,15 +407,20 @@ public:
     }
 
     u32 Add(const TextureDescriptor& desc) {
-        return Add(texture_descriptors, desc, [&desc](const auto& existing) {
+        const u32 index{Add(texture_descriptors, desc, [&desc](const auto& existing) {
             return desc.type == existing.type && desc.is_depth == existing.is_depth &&
                    desc.has_secondary == existing.has_secondary &&
                    desc.cbuf_index == existing.cbuf_index &&
                    desc.cbuf_offset == existing.cbuf_offset &&
+                   desc.shift_left == existing.shift_left &&
                    desc.secondary_cbuf_index == existing.secondary_cbuf_index &&
                    desc.secondary_cbuf_offset == existing.secondary_cbuf_offset &&
+                   desc.secondary_shift_left == existing.secondary_shift_left &&
                    desc.count == existing.count && desc.size_shift == existing.size_shift;
-        });
+        })};
+        // TODO: Read this from TIC
+        texture_descriptors[index].is_multisample |= desc.is_multisample;
+        return index;
     }
 
     u32 Add(const ImageDescriptor& desc) {
@@ -452,7 +459,8 @@ void PatchImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
     const IR::Value coord(inst.Arg(1));
     const IR::Value handle(ir.Imm32(0));
     const IR::U32 lod{ir.Imm32(0)};
-    const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, info);
+    const IR::U1 skip_mips{ir.Imm1(true)};
+    const IR::Value texture_size = ir.ImageQueryDimension(handle, lod, skip_mips, info);
     inst.SetArg(
         1, ir.CompositeConstruct(
                ir.FPMul(IR::F32(ir.CompositeExtract(coord, 0)),
@@ -486,10 +494,10 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_
     const IR::F32 w(ir.CompositeExtract(new_inst, 3));
     const IR::F16F32F64 max_value(ir.Imm32(get_max_value()));
     const IR::Value converted =
-        ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(x)), max_value),
-                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(y)), max_value),
-                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(z)), max_value),
-                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::S32>(w)), max_value));
+        ir.CompositeConstruct(ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(x)), max_value),
+                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(y)), max_value),
+                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(z)), max_value),
+                              ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(w)), max_value));
     inst.ReplaceUsesWith(converted);
 }
 } // Anonymous namespace
@@ -524,6 +532,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
 
         const auto& cbuf{texture_inst.cbuf};
         auto flags{inst->Flags<IR::TextureInstInfo>()};
+        bool is_multisample{false};
         switch (inst->GetOpcode()) {
         case IR::Opcode::ImageQueryDimensions:
             flags.type.Assign(ReadTextureType(env, cbuf));
@@ -538,6 +547,12 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
             }
             break;
         case IR::Opcode::ImageFetch:
+            if (flags.type == TextureType::Color2D || flags.type == TextureType::Color2DRect ||
+                flags.type == TextureType::ColorArray2D) {
+                is_multisample = !inst->Arg(4).IsEmpty();
+            } else {
+                inst->SetArg(4, IR::U32{});
+            }
             if (flags.type != TextureType::Color1D) {
                 break;
             }
@@ -613,6 +628,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
                 index = descriptors.Add(TextureDescriptor{
                     .type = flags.type,
                     .is_depth = flags.is_depth != 0,
+                    .is_multisample = is_multisample,
                     .has_secondary = cbuf.has_secondary,
                     .cbuf_index = cbuf.index,
                     .cbuf_offset = cbuf.offset,
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
index 2b42c4ba2..5d648b159 100644
--- a/src/shader_recompiler/object_pool.h
+++ b/src/shader_recompiler/object_pool.h
@@ -10,7 +10,7 @@
 namespace Shader {
 
 template <typename T>
-requires std::is_destructible_v<T>
+    requires std::is_destructible_v<T>
 class ObjectPool {
 public:
     explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
@@ -18,7 +18,7 @@ public:
     }
 
     template <typename... Args>
-    requires std::is_constructible_v<T, Args...>
+        requires std::is_constructible_v<T, Args...>
     [[nodiscard]] T* Create(Args&&... args) {
         return std::construct_at(Memory(), std::forward<Args>(args)...);
     }
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 253e0d0bd..9ca97f6a4 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -43,6 +43,7 @@ struct Profile {
     bool support_gl_variable_aoffi{};
     bool support_gl_sparse_textures{};
     bool support_gl_derivative_control{};
+    bool support_scaled_attributes{};
 
     bool warp_size_potentially_larger_than_guest{};
 
@@ -52,6 +53,10 @@ struct Profile {
     bool need_declared_frag_colors{};
     /// Prevents fast math optimizations that may cause inaccuracies
     bool need_fastmath_off{};
+    /// Some GPU vendors use a different rounding precision when calculating texture pixel
+    /// coordinates with the 16.8 format in the ImageGather instruction than the Maxwell
+    /// architecture. Applying an offset does fix this mismatching rounding behaviour.
+    bool need_gather_subpixel_offset{};
 
     /// OpFClamp is broken and OpFMax + OpFMin should be used instead
     bool has_broken_spirv_clamp{};
@@ -73,6 +78,8 @@ struct Profile {
     bool has_gl_bool_ref_bug{};
     /// Ignores SPIR-V ordered vs unordered using GLSL semantics
     bool ignore_nan_fp_comparisons{};
+    /// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs
+    bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
 
     u32 gl_max_compute_smem_size{};
 };
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 549b81ef7..619c0b138 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -17,6 +17,8 @@ enum class AttributeType : u8 {
     Float,
     SignedInt,
     UnsignedInt,
+    SignedScaled,
+    UnsignedScaled,
     Disabled,
 };
 
@@ -82,7 +84,8 @@ struct RuntimeInfo {
     bool glasm_use_storage_buffers{};
 
     /// Transform feedback state for each varying
-    std::vector<TransformFeedbackVarying> xfb_varyings;
+    std::array<TransformFeedbackVarying, 256> xfb_varyings{};
+    u32 xfb_count{0};
 };
 
 } // namespace Shader
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index f93181e1e..b4b4afd37 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -109,6 +109,7 @@ using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescrip
 struct TextureDescriptor {
     TextureType type;
     bool is_depth;
+    bool is_multisample;
     bool has_secondary;
     u32 cbuf_index;
     u32 cbuf_offset;
@@ -171,6 +172,7 @@ struct Info {
     bool stores_indexed_attributes{};
 
     bool stores_global_memory{};
+    bool uses_local_memory{};
 
     bool uses_fp16{};
     bool uses_fp64{};