6 files changed, 145 insertions, 98 deletions
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index ec3601e8b..097416e9e 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -316,6 +316,11 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
                       "(STUBBED) Attempted to query privileged process id bounds, returned 0");
         *result = 0;
         break;
+    case GetInfoType::UserExceptionContextAddr:
+        NGLOG_WARNING(Kernel_SVC,
+                      "(STUBBED) Attempted to query user exception context address, returned 0");
+        *result = 0;
+        break;
     default:
         UNIMPLEMENTED();
     }
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 29d88192e..cefd57f4c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -230,10 +230,18 @@ union Instruction {
         } fmnmx;
 
         union {
+            BitField<39, 1, u64> invert_a;
+            BitField<40, 1, u64> invert_b;
+            BitField<41, 2, LogicOperation> operation;
+            BitField<44, 2, u64> unk44;
+            BitField<48, 3, Pred> pred48;
+        } lop;
+
+        union {
             BitField<53, 2, LogicOperation> operation;
             BitField<55, 1, u64> invert_a;
             BitField<56, 1, u64> invert_b;
-        } lop;
+        } lop32i;
 
         float GetImm20_19() const {
             float result{};
@@ -343,7 +351,8 @@ union Instruction {
     } iset;
 
     union {
-        BitField<10, 2, Register::Size> size;
+        BitField<8, 2, Register::Size> dest_size;
+        BitField<10, 2, Register::Size> src_size;
         BitField<12, 1, u64> is_output_signed;
         BitField<13, 1, u64> is_input_signed;
         BitField<41, 2, u64> selector;
@@ -475,6 +484,9 @@ public:
         I2I_C,
         I2I_R,
         I2I_IMM,
+        LOP_C,
+        LOP_R,
+        LOP_IMM,
         LOP32I,
         MOV_C,
         MOV_R,
@@ -517,7 +529,6 @@ public:
         ArithmeticInteger,
         ArithmeticIntegerImmediate,
         Bfe,
-        Logic,
         Shift,
         Ffma,
         Flow,
@@ -675,7 +686,10 @@ private:
             INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
             INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
             INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
-            INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"),
+            INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
+            INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
+            INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+            INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
             INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
             INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
             INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6f05f24a0..833ff8273 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -740,7 +740,6 @@ void RasterizerOpenGL::SyncDepthOffset() {
 
 void RasterizerOpenGL::SyncBlendState() {
     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-    ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
 
     // TODO(Subv): Support more than just render target 0.
     state.blend.enabled = regs.blend.enable[0] != 0;
@@ -748,6 +747,7 @@ void RasterizerOpenGL::SyncBlendState() {
     if (!state.blend.enabled)
         return;
 
+    ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
     ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
     state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
     state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ff48a2669..e61960cc0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -888,9 +888,6 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc
     // Use GetSurfaceSubRect instead
     ASSERT(params.width == params.stride);
 
-    ASSERT(!params.is_tiled ||
-           (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0));
-
     // Check for an exact match in existing surfaces
     Surface surface =
         FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale);
@@ -1048,8 +1045,13 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
 
     if (config.tic.IsTiled()) {
         params.block_height = config.tic.BlockHeight();
-        params.width = Common::AlignUp(params.width, params.block_height);
-        params.height = Common::AlignUp(params.height, params.block_height);
+
+        // TODO(bunnei): The below align up is a hack. This is here because some compressed textures
+        // are not a multiple of their own compression factor, and so this accounts for that. This
+        // could potentially result in an extra row of 4px being decoded if a texture is not a
+        // multiple of 4.
+        params.width = Common::AlignUp(params.width, 4);
+        params.height = Common::AlignUp(params.height, 4);
     } else {
         // Use the texture-provided stride value if the texture isn't tiled.
         params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch()));
@@ -1057,26 +1059,6 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
 
     params.UpdateParams();
 
-    if (params.GetActualWidth() % 8 != 0 || params.GetActualHeight() % 8 != 0 ||
-        params.stride != params.width) {
-        Surface src_surface;
-        MathUtil::Rectangle<u32> rect;
-        std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
-
-        rect = rect.Scale(params.GetCompresssionFactor());
-
-        params.res_scale = src_surface->res_scale;
-        Surface tmp_surface = CreateSurface(params);
-
-        auto dst_rect = tmp_surface->GetScaledRect().Scale(params.GetCompresssionFactor());
-        BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, dst_rect,
-                     SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle,
-                     draw_framebuffer.handle);
-
-        remove_surfaces.emplace(tmp_surface);
-        return tmp_surface;
-    }
-
     return GetSurface(params, ScaleMatch::Ignore, true);
 }
 
@@ -1251,7 +1233,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVA
 
         const auto interval = *it & validate_interval;
         // Look for a valid surface to copy from
-        SurfaceParams params = surface->FromInterval(interval);
+        SurfaceParams params = *surface;
 
         Surface copy_surface =
             FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7ce150fda..6ec0a0742 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -16,6 +16,7 @@ namespace Decompiler {
 
 using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
+using Tegra::Shader::LogicOperation;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 using Tegra::Shader::Sampler;
@@ -266,6 +267,27 @@ public:
     }
 
     /**
+     * Returns code that does an integer size conversion for the specified size.
+     * @param value Value to perform integer size conversion on.
+     * @param size Register size to use for conversion instructions.
+     * @returns GLSL string corresponding to the value converted to the specified size.
+     */
+    static std::string ConvertIntegerSize(const std::string& value, Register::Size size) {
+        switch (size) {
+        case Register::Size::Byte:
+            return "((" + value + " << 24) >> 24)";
+        case Register::Size::Short:
+            return "((" + value + " << 16) >> 16)";
+        case Register::Size::Word:
+            // Default - do nothing
+            return value;
+        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented conversion size {}", static_cast<u32>(size));
+            UNREACHABLE();
+        }
+    }
+
+    /**
      * Gets a register as an float.
      * @param reg The register to get.
      * @param elem The element to use for the operation.
@@ -281,15 +303,18 @@ public:
      * @param reg The register to get.
      * @param elem The element to use for the operation.
      * @param is_signed Whether to get the register as a signed (or unsigned) integer.
+     * @param size Register size to use for conversion instructions.
      * @returns GLSL string corresponding to the register as an integer.
      */
-    std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0,
-                                     bool is_signed = true) {
+    std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
+                                     Register::Size size = Register::Size::Word) {
         const std::string func = GetGLSLConversionFunc(
             GLSLRegister::Type::Float,
             is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger);
 
-        return func + '(' + GetRegister(reg, elem) + ')';
+        std::string value = func + '(' + GetRegister(reg, elem) + ')';
+
+        return ConvertIntegerSize(value, size);
     }
 
     /**
@@ -319,19 +344,20 @@ public:
      * @param value_num_components Number of components in the value.
      * @param is_saturated Optional, when True, saturates the provided value.
      * @param dest_elem Optional, the destination element to use for the operation.
+     * @param size Register size to use for conversion instructions.
      */
     void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
                               const std::string& value, u64 dest_num_components,
                               u64 value_num_components, bool is_saturated = false,
-                              u64 dest_elem = 0) {
+                              u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
         ASSERT_MSG(!is_saturated, "Unimplemented");
 
         const std::string func = GetGLSLConversionFunc(
             is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger,
             GLSLRegister::Type::Float);
 
-        SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components,
-                    dest_elem);
+        SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
+                    dest_num_components, value_num_components, dest_elem);
     }
 
     /**
@@ -734,6 +760,31 @@ private:
         return (absolute_offset % SchedPeriod) == 0;
     }
 
+    void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
+                             const std::string& op_b) {
+        switch (logic_op) {
+        case LogicOperation::And: {
+            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
+            break;
+        }
+        case LogicOperation::Or: {
+            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
+            break;
+        }
+        case LogicOperation::Xor: {
+            regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
+            break;
+        }
+        case LogicOperation::PassB: {
+            regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
+            break;
+        }
+        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
+            UNREACHABLE();
+        }
+    }
+
     /**
      * Compiles a single instruction from Tegra to GLSL.
      * @param offset the offset of the Tegra shader instruction.
@@ -917,49 +968,6 @@ private:
 
             break;
         }
-        case OpCode::Type::Logic: {
-            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
-
-            if (instr.alu.lop.invert_a)
-                op_a = "~(" + op_a + ')';
-
-            switch (opcode->GetId()) {
-            case OpCode::Id::LOP32I: {
-                u32 imm = static_cast<u32>(instr.alu.imm20_32.Value());
-
-                if (instr.alu.lop.invert_b)
-                    imm = ~imm;
-
-                switch (instr.alu.lop.operation) {
-                case Tegra::Shader::LogicOperation::And: {
-                    regs.SetRegisterToInteger(instr.gpr0, true, 0,
-                                              '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1);
-                    break;
-                }
-                case Tegra::Shader::LogicOperation::Or: {
-                    regs.SetRegisterToInteger(instr.gpr0, true, 0,
-                                              '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1);
-                    break;
-                }
-                case Tegra::Shader::LogicOperation::Xor: {
-                    regs.SetRegisterToInteger(instr.gpr0, true, 0,
-                                              '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1);
-                    break;
-                }
-                default:
-                    NGLOG_CRITICAL(HW_GPU, "Unimplemented lop32i operation: {}",
-                                   static_cast<u32>(instr.alu.lop.operation.Value()));
-                    UNREACHABLE();
-                }
-                break;
-            }
-            default: {
-                NGLOG_CRITICAL(HW_GPU, "Unhandled logic instruction: {}", opcode->GetName());
-                UNREACHABLE();
-            }
-            }
-            break;
-        }
 
         case OpCode::Type::Shift: {
             std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
@@ -1005,17 +1013,26 @@ private:
 
         case OpCode::Type::ArithmeticIntegerImmediate: {
             std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
-
-            if (instr.iadd32i.negate_a)
-                op_a = '-' + op_a;
-
-            std::string op_b = '(' + std::to_string(instr.alu.imm20_32.Value()) + ')';
+            std::string op_b = std::to_string(instr.alu.imm20_32.Value());
 
             switch (opcode->GetId()) {
             case OpCode::Id::IADD32I:
+                if (instr.iadd32i.negate_a)
+                    op_a = "-(" + op_a + ')';
+
                 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
                                           instr.iadd32i.saturate != 0);
                 break;
+            case OpCode::Id::LOP32I: {
+                if (instr.alu.lop32i.invert_a)
+                    op_a = "~(" + op_a + ')';
+
+                if (instr.alu.lop32i.invert_b)
+                    op_b = "~(" + op_b + ')';
+
+                WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
+                break;
+            }
             default: {
                 NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}",
                                opcode->GetName());
@@ -1026,12 +1043,7 @@ private:
         }
         case OpCode::Type::ArithmeticInteger: {
             std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
-
-            if (instr.alu_integer.negate_a)
-                op_a = '-' + op_a;
-
-            std::string op_b = instr.alu_integer.negate_b ? "-" : "";
-
+            std::string op_b;
             if (instr.is_b_imm) {
                 op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
             } else {
@@ -1047,6 +1059,12 @@ private:
             case OpCode::Id::IADD_C:
             case OpCode::Id::IADD_R:
             case OpCode::Id::IADD_IMM: {
+                if (instr.alu_integer.negate_a)
+                    op_a = "-(" + op_a + ')';
+
+                if (instr.alu_integer.negate_b)
+                    op_b = "-(" + op_b + ')';
+
                 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
                                           instr.alu.saturate_d);
                 break;
@@ -1054,12 +1072,33 @@ private:
             case OpCode::Id::ISCADD_C:
             case OpCode::Id::ISCADD_R:
             case OpCode::Id::ISCADD_IMM: {
+                if (instr.alu_integer.negate_a)
+                    op_a = "-(" + op_a + ')';
+
+                if (instr.alu_integer.negate_b)
+                    op_b = "-(" + op_b + ')';
+
                 std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
 
                 regs.SetRegisterToInteger(instr.gpr0, true, 0,
                                           "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
                 break;
             }
+            case OpCode::Id::LOP_C:
+            case OpCode::Id::LOP_R:
+            case OpCode::Id::LOP_IMM: {
+                ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
+                ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
+
+                if (instr.alu.lop.invert_a)
+                    op_a = "~(" + op_a + ')';
+
+                if (instr.alu.lop.invert_b)
+                    op_b = "~(" + op_b + ')';
+
+                WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
+                break;
+            }
             default: {
                 NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
                                opcode->GetName());
@@ -1108,28 +1147,28 @@ private:
             break;
         }
         case OpCode::Type::Conversion: {
-            ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
             ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
 
             switch (opcode->GetId()) {
             case OpCode::Id::I2I_R: {
                 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
 
-                std::string op_a =
-                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
+                std::string op_a = regs.GetRegisterAsInteger(
+                    instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
 
                 if (instr.conversion.abs_a) {
                     op_a = "abs(" + op_a + ')';
                 }
 
                 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
-                                          1, instr.alu.saturate_d);
+                                          1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
                 break;
             }
             case OpCode::Id::I2F_R: {
+                ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
                 ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
-                std::string op_a =
-                    regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed);
+                std::string op_a = regs.GetRegisterAsInteger(
+                    instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
 
                 if (instr.conversion.abs_a) {
                     op_a = "abs(" + op_a + ')';
@@ -1139,6 +1178,8 @@ private:
                 break;
             }
             case OpCode::Id::F2F_R: {
+                ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
+                ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
                 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
 
                 switch (instr.conversion.f2f.rounding) {
@@ -1168,6 +1209,7 @@ private:
                 break;
             }
             case OpCode::Id::F2I_R: {
+                ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
                 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
 
                 if (instr.conversion.abs_a) {
@@ -1200,7 +1242,7 @@ private:
                 }
 
                 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
-                                          1);
+                                          1, false, 0, instr.conversion.dest_size);
                 break;
             }
             default: {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index b88d592b7..c1e6fac9f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -39,6 +39,10 @@ void main() {
     // Viewport can be flipped, which is unsupported by glViewport
     position.xy *= viewport_flip.xy;
     gl_Position = position;
+
+    // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
+    // For now, this is here to bring order in lieu of proper emulation
+    position.w = 1.0;
 }
 )";
     out += program.first;