7 files changed, 120 insertions, 98 deletions
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
index 92484c526..de4e88cae 100644
--- a/src/audio_core/hle/source.cpp
+++ b/src/audio_core/hle/source.cpp
@@ -244,17 +244,27 @@ void Source::GenerateFrame() {
             break;
         }
 
-        const size_t size_to_copy =
-            std::min(state.current_buffer.size(), current_frame.size() - frame_position);
-
-        std::copy(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy,
-                  current_frame.begin() + frame_position);
-        state.current_buffer.erase(state.current_buffer.begin(),
-                                   state.current_buffer.begin() + size_to_copy);
-
-        frame_position += size_to_copy;
-        state.next_sample_number += static_cast<u32>(size_to_copy);
+        switch (state.interpolation_mode) {
+        case InterpolationMode::None:
+            AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier,
+                              current_frame, frame_position);
+            break;
+        case InterpolationMode::Linear:
+            AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier,
+                                current_frame, frame_position);
+            break;
+        case InterpolationMode::Polyphase:
+            // TODO(merry): Implement polyphase interpolation
+            LOG_DEBUG(Audio_DSP, "Polyphase interpolation unimplemented; falling back to linear");
+            AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier,
+                                current_frame, frame_position);
+            break;
+        default:
+            UNIMPLEMENTED();
+            break;
+        }
     }
+    state.next_sample_number += frame_position;
 
     state.filters.ProcessFrame(current_frame);
 }
@@ -305,25 +315,6 @@ bool Source::DequeueBuffer() {
         return true;
     }
 
-    switch (state.interpolation_mode) {
-    case InterpolationMode::None:
-        state.current_buffer =
-            AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier);
-        break;
-    case InterpolationMode::Linear:
-        state.current_buffer =
-            AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
-        break;
-    case InterpolationMode::Polyphase:
-        // TODO(merry): Implement polyphase interpolation
-        state.current_buffer =
-            AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
-        break;
-    default:
-        UNIMPLEMENTED();
-        break;
-    }
-
     // the first playthrough starts at play_position, loops start at the beginning of the buffer
     state.current_sample_number = (!buf.has_played) ? buf.play_position : 0;
     state.next_sample_number = state.current_sample_number;
diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp
index 8a5d4181a..16e68bc5c 100644
--- a/src/audio_core/interpolate.cpp
+++ b/src/audio_core/interpolate.cpp
@@ -13,74 +13,64 @@ namespace AudioInterp {
 constexpr u64 scale_factor = 1 << 24;
 constexpr u64 scale_mask = scale_factor - 1;
 
-/// Here we step over the input in steps of rate_multiplier, until we consume all of the input.
+/// Here we step over the input in steps of rate, until we consume all of the input.
 /// Three adjacent samples are passed to fn each step.
 template <typename Function>
-static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input,
-                                      float rate_multiplier, Function fn) {
-    ASSERT(rate_multiplier > 0);
+static void StepOverSamples(State& state, StereoBuffer16& input, float rate,
+                            DSP::HLE::StereoFrame16& output, size_t& outputi, Function fn) {
+    ASSERT(rate > 0);
 
-    if (input.size() < 2)
-        return {};
+    if (input.empty())
+        return;
 
-    StereoBuffer16 output;
-    output.reserve(static_cast<size_t>(input.size() / rate_multiplier));
+    input.insert(input.begin(), {state.xn2, state.xn1});
 
-    u64 step_size = static_cast<u64>(rate_multiplier * scale_factor);
+    const u64 step_size = static_cast<u64>(rate * scale_factor);
+    u64 fposition = state.fposition;
+    size_t inputi = 0;
 
-    u64 fposition = 0;
-    const u64 max_fposition = input.size() * scale_factor;
+    while (outputi < output.size()) {
+        inputi = static_cast<size_t>(fposition / scale_factor);
 
-    while (fposition < 1 * scale_factor) {
-        u64 fraction = fposition & scale_mask;
-
-        output.push_back(fn(fraction, state.xn2, state.xn1, input[0]));
-
-        fposition += step_size;
-    }
-
-    while (fposition < 2 * scale_factor) {
-        u64 fraction = fposition & scale_mask;
-
-        output.push_back(fn(fraction, state.xn1, input[0], input[1]));
-
-        fposition += step_size;
-    }
+        if (inputi + 2 >= input.size()) {
+            inputi = input.size() - 2;
+            break;
+        }
 
-    while (fposition < max_fposition) {
         u64 fraction = fposition & scale_mask;
-
-        size_t index = static_cast<size_t>(fposition / scale_factor);
-        output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index]));
+        output[outputi++] = fn(fraction, input[inputi], input[inputi + 1], input[inputi + 2]);
 
         fposition += step_size;
     }
 
-    state.xn2 = input[input.size() - 2];
-    state.xn1 = input[input.size() - 1];
+    state.xn2 = input[inputi];
+    state.xn1 = input[inputi + 1];
+    state.fposition = fposition - inputi * scale_factor;
 
-    return output;
+    input.erase(input.begin(), input.begin() + inputi + 2);
 }
 
-StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) {
-    return StepOverSamples(
-        state, input, rate_multiplier,
+void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output,
+          size_t& outputi) {
+    StepOverSamples(
+        state, input, rate, output, outputi,
         [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { return x0; });
 }
 
-StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) {
+void Linear(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output,
+            size_t& outputi) {
     // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware.
-    return StepOverSamples(state, input, rate_multiplier,
-                           [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
-                               // This is a saturated subtraction. (Verified by black-box fuzzing.)
-                               s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
-                               s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
-
-                               return std::array<s16, 2>{
-                                   static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
-                                   static_cast<s16>(x0[1] + fraction * delta1 / scale_factor),
-                               };
-                           });
+    StepOverSamples(state, input, rate, output, outputi,
+                    [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
+                        // This is a saturated subtraction. (Verified by black-box fuzzing.)
+                        s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
+                        s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
+
+                        return std::array<s16, 2>{
+                            static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
+                            static_cast<s16>(x0[1] + fraction * delta1 / scale_factor),
+                        };
+                    });
 }
 
 } // namespace AudioInterp
diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h
index 19a7b66cb..59f59bc14 100644
--- a/src/audio_core/interpolate.h
+++ b/src/audio_core/interpolate.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <vector>
+#include "audio_core/hle/common.h"
 #include "common/common_types.h"
 
 namespace AudioInterp {
@@ -14,31 +15,35 @@ namespace AudioInterp {
 using StereoBuffer16 = std::vector<std::array<s16, 2>>;
 
 struct State {
-    // Two historical samples.
+    /// Two historical samples.
     std::array<s16, 2> xn1 = {}; ///< x[n-1]
     std::array<s16, 2> xn2 = {}; ///< x[n-2]
+    /// Current fractional position.
+    u64 fposition = 0;
 };
 
 /**
  * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
  * @param state Interpolation state.
  * @param input Input buffer.
- * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
- *                        rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0
- *                        performs upsampling.
- * @return The resampled audio buffer.
+ * @param rate Stretch factor. Must be a positive non-zero value.
+ *             rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
+ * @param output The resampled audio buffer.
+ * @param outputi The index of output to start writing to.
  */
-StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier);
+void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output,
+          size_t& outputi);
 
 /**
  * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
  * @param state Interpolation state.
  * @param input Input buffer.
- * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
- *                        rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0
- *                        performs upsampling.
- * @return The resampled audio buffer.
+ * @param rate Stretch factor. Must be a positive non-zero value.
+ *             rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
+ * @param output The resampled audio buffer.
+ * @param outputi The index of output to start writing to.
  */
-StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier);
+void Linear(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output,
+            size_t& outputi);
 
 } // namespace AudioInterp
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 015e69da9..c8fc7a0ff 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -8,6 +8,7 @@
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "video_core/regs_framebuffer.h"
 #include "video_core/regs_lighting.h"
 #include "video_core/regs_rasterizer.h"
@@ -594,8 +595,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
                 // Note: even if the normal vector is modified by normal map, which is not the
                 // normal of the tangent plane anymore, the half angle vector is still projected
                 // using the modified normal vector.
-                std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, "
-                                              "normal) * dot(normal, normalize(half_vector))";
+                std::string half_angle_proj =
+                    "normalize(half_vector) - normal * dot(normal, normalize(half_vector))";
                 // Note: the half angle vector projection is confirmed not normalized before the dot
                 // product. The result is in fact not cos(phi) as the name suggested.
                 index = "dot(" + half_angle_proj + ", tangent)";
@@ -1155,6 +1156,11 @@ vec4 secondary_fragment_color = vec4(0.0);
 
         // Blend the fog
         out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n";
+    } else if (state.fog_mode == TexturingRegs::FogMode::Gas) {
+        Core::Telemetry().AddField(Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode",
+                                   true);
+        LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode");
+        UNIMPLEMENTED();
     }
 
     out += "gl_FragDepth = depth;\n";
diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp
index 39a3e396d..b38964530 100644
--- a/src/video_core/swrasterizer/lighting.cpp
+++ b/src/video_core/swrasterizer/lighting.cpp
@@ -22,18 +22,37 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut
 
 std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
     const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state,
-    const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view) {
+    const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view,
+    const Math::Vec4<u8> (&texture_color)[4]) {
 
-    // TODO(Subv): Bump mapping
-    Math::Vec3<float> surface_normal = {0.0f, 0.0f, 1.0f};
+    Math::Vec3<float> surface_normal;
+    Math::Vec3<float> surface_tangent;
 
     if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) {
-        LOG_CRITICAL(HW_GPU, "unimplemented bump mapping");
-        UNIMPLEMENTED();
+        Math::Vec3<float> perturbation =
+            texture_color[lighting.config0.bump_selector].xyz().Cast<float>() / 127.5f -
+            Math::MakeVec(1.0f, 1.0f, 1.0f);
+        if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
+            if (!lighting.config0.disable_bump_renorm) {
+                const float z_square = 1 - perturbation.xy().Length2();
+                perturbation.z = std::sqrt(std::max(z_square, 0.0f));
+            }
+            surface_normal = perturbation;
+            surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f);
+        } else if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
+            surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f);
+            surface_tangent = perturbation;
+        } else {
+            LOG_ERROR(HW_GPU, "Unknown bump mode %u", lighting.config0.bump_mode.Value());
+        }
+    } else {
+        surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f);
+        surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f);
     }
 
     // Use the normalized the quaternion when performing the rotation
     auto normal = Math::QuaternionRotate(normquat, surface_normal);
+    auto tangent = Math::QuaternionRotate(normquat, surface_tangent);
 
     Math::Vec4<float> diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f};
     Math::Vec4<float> specular_sum = {0.0f, 0.0f, 0.0f, 1.0f};
@@ -102,6 +121,16 @@ std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
                 result = Math::Dot(light_vector, spot_dir.Cast<float>() / 2047.0f);
                 break;
             }
+            case LightingRegs::LightingLutInput::CP:
+                if (lighting.config0.config == LightingRegs::LightingConfig::Config7) {
+                    const Math::Vec3<float> norm_half_vector = half_vector.Normalized();
+                    const Math::Vec3<float> half_vector_proj =
+                        norm_half_vector - normal * Math::Dot(normal, norm_half_vector);
+                    result = Math::Dot(half_vector_proj, tangent);
+                } else {
+                    result = 0.0f;
+                }
+                break;
             default:
                 LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast<u32>(input));
                 UNIMPLEMENTED();
diff --git a/src/video_core/swrasterizer/lighting.h b/src/video_core/swrasterizer/lighting.h
index 438dca926..d807a3d94 100644
--- a/src/video_core/swrasterizer/lighting.h
+++ b/src/video_core/swrasterizer/lighting.h
@@ -13,6 +13,7 @@ namespace Pica {
 
 std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
     const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state,
-    const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view);
+    const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view,
+    const Math::Vec4<u8> (&texture_color)[4]);
 
 } // namespace Pica
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index fdc1df199..862135614 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -437,8 +437,8 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                     GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(),
                     GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
                 };
-                std::tie(primary_fragment_color, secondary_fragment_color) =
-                    ComputeFragmentsColors(g_state.regs.lighting, g_state.lighting, normquat, view);
+                std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors(
+                    g_state.regs.lighting, g_state.lighting, normquat, view, texture_color);
             }
 
             for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size();