5 files changed, 120 insertions, 15 deletions
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 5d609da06..3a09d62f4 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -68,6 +68,8 @@ static void InitScreenCoordinates(OutputVertex& vtx)
 
     float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
     vtx.color *= inv_w;
+    vtx.view *= inv_w;
+    vtx.quat *= inv_w;
     vtx.tc0 *= inv_w;
     vtx.tc1 *= inv_w;
     vtx.tc2 *= inv_w;
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b09484de4..178a4b83f 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -241,7 +241,8 @@ struct Regs {
     TextureConfig texture0;
     INSERT_PADDING_WORDS(0x8);
     BitField<0, 4, TextureFormat> texture0_format;
-    INSERT_PADDING_WORDS(0x2);
+    BitField<0, 1, u32> fragment_lighting_enable;
+    INSERT_PADDING_WORDS(0x1);
     TextureConfig texture1;
     BitField<0, 4, TextureFormat> texture1_format;
     INSERT_PADDING_WORDS(0x2);
@@ -645,6 +646,22 @@ struct Regs {
 
     INSERT_PADDING_WORDS(0x20);
 
+    enum class LightingSampler {
+        Distribution0 = 0,
+        Distribution1 = 1,
+        Fresnel = 3,
+        Blue = 4,
+        Green = 5,
+        Red = 6,
+        SpotlightAttenuation = 8,
+        DistanceAttenuation = 16,
+    };
+
+    enum class LightingLutInput {
+        NH = 0, // Cosine of the angle between the normal and half-angle vectors
+        LN = 3, // Cosine of the angle between the light and the normal vectors
+    };
+
     struct {
         union LightColor {
             BitField< 0, 10, u32> b;
@@ -664,17 +681,21 @@ struct Regs {
 
             struct {
                 // Encoded as 16-bit floating point
-                u16 x;
-                u16 y;
-                u16 z;
-                u16 unk;
+                union {
+                    BitField< 0, 16, u32> x;
+                    BitField<16, 16, u32> y;
+                };
+                union {
+                    BitField< 0, 16, u32> z;
+                };
 
                 INSERT_PADDING_WORDS(0x3);
 
-                // 1.f if 0, otherwise 0.f
-                BitField<0, 1, u32> w;
-            } position;
-
+                union {
+                    BitField<0, 1, u32> w; // 1.f if 0, otherwise 0.f
+                    BitField<1, 1, u32> two_sided_diffuse; // when disabled, clamp dot-product to 0
+                };
+            };
 
             BitField<0, 20, u32> dist_atten_bias;
             BitField<0, 20, u32> dist_atten_scale;
@@ -722,7 +743,27 @@ struct Regs {
         // registers is written to, the behavior will be the same.
         u32 lut_data[8];
 
-        INSERT_PADDING_WORDS(0x9);
+        union {
+            BitField< 1, 1, u32> d0;
+            BitField< 5, 1, u32> d1;
+            BitField< 9, 1, u32> sp;
+            BitField<13, 1, u32> fr;
+            BitField<17, 1, u32> rb;
+            BitField<21, 1, u32> rg;
+            BitField<25, 1, u32> rr;
+        } abs_lut_input;
+
+        union {
+            BitField< 0, 3, u32> d0;
+            BitField< 4, 3, u32> d1;
+            BitField< 8, 3, u32> sp;
+            BitField<12, 3, u32> fr;
+            BitField<16, 3, u32> rb;
+            BitField<20, 3, u32> rg;
+            BitField<24, 3, u32> rr;
+        } lut_input;
+
+        INSERT_PADDING_WORDS(0x7);
 
         union {
             // There are 8 light enable "slots", corresponding to the total number of lights
@@ -1095,6 +1136,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68);
 ASSERT_REG_POSITION(texture0_enable, 0x80);
 ASSERT_REG_POSITION(texture0, 0x81);
 ASSERT_REG_POSITION(texture0_format, 0x8e);
+ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f);
 ASSERT_REG_POSITION(texture1, 0x91);
 ASSERT_REG_POSITION(texture1_format, 0x96);
 ASSERT_REG_POSITION(texture2, 0x99);
@@ -1109,6 +1151,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8);
 ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
 ASSERT_REG_POSITION(output_merger, 0x100);
 ASSERT_REG_POSITION(framebuffer, 0x110);
+ASSERT_REG_POSITION(lighting, 0x140);
 ASSERT_REG_POSITION(vertex_attributes, 0x200);
 ASSERT_REG_POSITION(index_array, 0x227);
 ASSERT_REG_POSITION(num_vertices, 0x228);
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index de798aa81..a34421c5d 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -121,4 +121,60 @@ private:
 
 static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
 
+struct float16 {
+    // 10 bit mantissa, 5 bit exponent, 1 bit sign
+    // TODO: No idea if this works as intended
+    static float16 FromRawFloat16(u32 hex) {
+        float16 ret;
+        if ((hex & 0xFFFF) == 0) {
+            ret.value = 0;
+        } else {
+            u32 mantissa = hex & 0x3FF;
+            u32 exponent = (hex >> 10) & 0x1F;
+            u32 sign = (hex >> 15) & 1;
+            ret.value = std::pow(2.0f, (float)exponent - 15.0f) * (1.0f + mantissa * std::pow(2.0f, -10.f));
+            if (sign)
+                ret.value = -ret.value;
+        }
+        return ret;
+    }
+
+    float ToFloat32() const {
+        return value;
+    }
+
+private:
+    // Stored as a regular float, merely for convenience
+    // TODO: Perform proper arithmetic on this!
+    float value;
+};
+
+struct float20 {
+    // 12 bit mantissa, 7 bit exponent, 1 bit sign
+    // TODO: No idea if this works as intended
+    static float20 FromRawFloat20(u32 hex) {
+        float20 ret;
+        if ((hex & 0xFFFFF) == 0) {
+            ret.value = 0;
+        } else {
+            u32 mantissa = hex & 0xFFF;
+            u32 exponent = (hex >> 12) & 0x7F;
+            u32 sign = (hex >> 19) & 1;
+            ret.value = std::pow(2.0f, (float)exponent - 63.0f) * (1.0f + mantissa * std::pow(2.0f, -12.f));
+            if (sign)
+                ret.value = -ret.value;
+        }
+        return ret;
+    }
+
+    float ToFloat32() const {
+        return value;
+    }
+
+private:
+    // Stored as a regular float, merely for convenience
+    // TODO: Perform proper arithmetic on this!
+    float value;
+};
+
 } // namespace Pica
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 59f54236b..44c234ed8 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -134,11 +134,13 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
             std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
     }
 
-    LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
+    LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
+        "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
         ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
         ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
-        ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
+        ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
+        ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
 
     return ret;
 }
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 1c6fa592c..f068cd93f 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -37,17 +37,19 @@ struct OutputVertex {
     Math::Vec4<float24> color;
     Math::Vec2<float24> tc0;
     Math::Vec2<float24> tc1;
-    float24 pad[6];
+    INSERT_PADDING_WORDS(2);
+    Math::Vec3<float24> view;
+    INSERT_PADDING_WORDS(1);
     Math::Vec2<float24> tc2;
 
     // Padding for optimal alignment
-    float24 pad2[4];
+    INSERT_PADDING_WORDS(4);
 
     // Attributes used to store intermediate results
 
     // position after perspective divide
     Math::Vec3<float24> screenpos;
-    float24 pad3;
+    INSERT_PADDING_WORDS(1);
 
     // Linear interpolation
     // factor: 0=this, 1=vtx