diff options
37 files changed, 740 insertions, 264 deletions
| diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp index 5005ba519..a58f24169 100644 --- a/src/audio_core/algorithm/interpolate.cpp +++ b/src/audio_core/algorithm/interpolate.cpp @@ -5,6 +5,7 @@  #define _USE_MATH_DEFINES  #include <algorithm> +#include <climits>  #include <cmath>  #include <vector>  #include "audio_core/algorithm/interpolate.h" @@ -13,13 +14,131 @@  namespace AudioCore { -/// The Lanczos kernel -static double Lanczos(std::size_t a, double x) { -    if (x == 0.0) -        return 1.0; -    const double px = M_PI * x; -    return a * std::sin(px) * std::sin(px / a) / (px * px); -} +constexpr std::array<s16, 512> curve_lut0 = { +    6600,  19426, 6722,  3,     6479,  19424, 6845,  9,     6359,  19419, 6968,  15,    6239, +    19412, 7093,  22,    6121,  19403, 7219,  28,    6004,  19391, 7345,  34,    5888,  19377, +    7472,  41,    5773,  19361, 7600,  48,    5659,  19342, 7728,  55,    5546,  19321, 7857, +    62,    5434,  19298, 7987,  69,    5323,  19273, 8118,  77,    5213,  19245, 8249,  84, +    5104,  19215, 8381,  92,    4997,  19183, 8513,  101,   4890,  19148, 8646,  109,   4785, +    19112, 8780,  118,   4681,  19073, 8914,  127,   4579,  19031, 9048,  137,   4477,  18988, +    9183,  147,   4377,  18942, 9318,  157,   4277,  18895, 9454,  168,   4179,  18845, 9590, +    179,   4083,  18793, 9726,  190,   3987,  18738, 9863,  202,   3893,  18682, 10000, 215, +    3800,  18624, 10137, 228,   3709,  18563, 10274, 241,   3618,  18500, 10411, 255,   3529, +    18436, 10549, 270,   3441,  18369, 10687, 285,   3355,  18300, 10824, 300,   3269,  18230, +    10962, 317,   3186,  18157, 11100, 334,   3103,  18082, 11238, 351,   3022,  18006, 11375, +    369,   2942,  17927, 11513, 388,   2863,  17847, 11650, 408,   2785,  17765, 11788, 428, +    2709,  17681, 11925, 449,   2635,  17595, 12062, 471,   2561,  17507, 12198, 494,   2489, +    17418, 12334, 517,   2418,  17327, 12470, 541,   2348,  17234, 12606, 566,   2280,  17140, +    12741, 592,   2213,  17044, 12876, 619,   2147,  16946, 13010, 647,   2083,  16846, 13144, +    675,   2020,  16745, 13277, 704,   1958,  16643, 13409, 735,   1897,  16539, 13541, 766, +    1838,  16434, 13673, 798,   1780,  16327, 13803, 832,   1723,  16218, 13933, 866,   1667, +    16109, 14062, 901,   1613,  15998, 14191, 937,   1560,  15885, 14318, 975,   1508,  15772, +    14445, 1013,  1457,  15657, 14571, 1052,  1407,  15540, 14695, 1093,  1359,  15423, 14819, +    1134,  1312,  15304, 14942, 1177,  1266,  15185, 15064, 1221,  1221,  15064, 15185, 1266, +    1177,  14942, 15304, 1312,  1134,  14819, 15423, 1359,  1093,  14695, 15540, 1407,  1052, +    14571, 15657, 1457,  1013,  14445, 15772, 1508,  975,   14318, 15885, 1560,  937,   14191, +    15998, 1613,  901,   14062, 16109, 1667,  866,   13933, 16218, 1723,  832,   13803, 16327, +    1780,  798,   13673, 16434, 1838,  766,   13541, 16539, 1897,  735,   13409, 16643, 1958, +    704,   13277, 16745, 2020,  675,   13144, 16846, 2083,  647,   13010, 16946, 2147,  619, +    12876, 17044, 2213,  592,   12741, 17140, 2280,  566,   12606, 17234, 2348,  541,   12470, +    17327, 2418,  517,   12334, 17418, 2489,  494,   12198, 17507, 2561,  471,   12062, 17595, +    2635,  449,   11925, 17681, 2709,  428,   11788, 17765, 2785,  408,   11650, 17847, 2863, +    388,   11513, 17927, 2942,  369,   11375, 18006, 3022,  351,   11238, 18082, 3103,  334, +    11100, 18157, 3186,  317,   10962, 18230, 3269,  300,   10824, 18300, 3355,  285,   10687, +    18369, 3441,  270,   10549, 18436, 3529,  255,   10411, 18500, 3618,  241,   10274, 18563, +    3709,  228,   10137, 18624, 3800,  215,   10000, 18682, 3893,  202,   9863,  18738, 3987, +    190,   9726,  18793, 4083,  179,   9590,  18845, 4179,  168,   9454,  18895, 4277,  157, +    9318,  18942, 4377,  147,   9183,  18988, 4477,  137,   9048,  19031, 4579,  127,   8914, +    19073, 4681,  118,   8780,  19112, 4785,  109,   8646,  19148, 4890,  101,   8513,  19183, +    4997,  92,    8381,  19215, 5104,  84,    8249,  19245, 5213,  77,    8118,  19273, 5323, +    69,    7987,  19298, 5434,  62,    7857,  19321, 5546,  55,    7728,  19342, 5659,  48, +    7600,  19361, 5773,  41,    7472,  19377, 5888,  34,    7345,  19391, 6004,  28,    7219, +    19403, 6121,  22,    7093,  19412, 6239,  15,    6968,  19419, 6359,  9,     6845,  19424, +    6479,  3,     6722,  19426, 6600}; + +constexpr std::array<s16, 512> curve_lut1 = { +    -68,   32639, 69,    -5,    -200,  32630, 212,   -15,   -328,  32613, 359,   -26,   -450, +    32586, 512,   -36,   -568,  32551, 669,   -47,   -680,  32507, 832,   -58,   -788,  32454, +    1000,  -69,   -891,  32393, 1174,  -80,   -990,  32323, 1352,  -92,   -1084, 32244, 1536, +    -103,  -1173, 32157, 1724,  -115,  -1258, 32061, 1919,  -128,  -1338, 31956, 2118,  -140, +    -1414, 31844, 2322,  -153,  -1486, 31723, 2532,  -167,  -1554, 31593, 2747,  -180,  -1617, +    31456, 2967,  -194,  -1676, 31310, 3192,  -209,  -1732, 31157, 3422,  -224,  -1783, 30995, +    3657,  -240,  -1830, 30826, 3897,  -256,  -1874, 30649, 4143,  -272,  -1914, 30464, 4393, +    -289,  -1951, 30272, 4648,  -307,  -1984, 30072, 4908,  -325,  -2014, 29866, 5172,  -343, +    -2040, 29652, 5442,  -362,  -2063, 29431, 5716,  -382,  -2083, 29203, 5994,  -403,  -2100, +    28968, 6277,  -424,  -2114, 28727, 6565,  -445,  -2125, 28480, 6857,  -468,  -2133, 28226, +    7153,  -490,  -2139, 27966, 7453,  -514,  -2142, 27700, 7758,  -538,  -2142, 27428, 8066, +    -563,  -2141, 27151, 8378,  -588,  -2136, 26867, 8694,  -614,  -2130, 26579, 9013,  -641, +    -2121, 26285, 9336,  -668,  -2111, 25987, 9663,  -696,  -2098, 25683, 9993,  -724,  -2084, +    25375, 10326, -753,  -2067, 25063, 10662, -783,  -2049, 24746, 11000, -813,  -2030, 24425, +    11342, -844,  -2009, 24100, 11686, -875,  -1986, 23771, 12033, -907,  -1962, 23438, 12382, +    -939,  -1937, 23103, 12733, -972,  -1911, 22764, 13086, -1005, -1883, 22422, 13441, -1039, +    -1855, 22077, 13798, -1072, -1825, 21729, 14156, -1107, -1795, 21380, 14516, -1141, -1764, +    21027, 14877, -1176, -1732, 20673, 15239, -1211, -1700, 20317, 15602, -1246, -1667, 19959, +    15965, -1282, -1633, 19600, 16329, -1317, -1599, 19239, 16694, -1353, -1564, 18878, 17058, +    -1388, -1530, 18515, 17423, -1424, -1495, 18151, 17787, -1459, -1459, 17787, 18151, -1495, +    -1424, 17423, 18515, -1530, -1388, 17058, 18878, -1564, -1353, 16694, 19239, -1599, -1317, +    16329, 19600, -1633, -1282, 15965, 19959, -1667, -1246, 15602, 20317, -1700, -1211, 15239, +    20673, -1732, -1176, 14877, 21027, -1764, -1141, 14516, 21380, -1795, -1107, 14156, 21729, +    -1825, -1072, 13798, 22077, -1855, -1039, 13441, 22422, -1883, -1005, 13086, 22764, -1911, +    -972,  12733, 23103, -1937, -939,  12382, 23438, -1962, -907,  12033, 23771, -1986, -875, +    11686, 24100, -2009, -844,  11342, 24425, -2030, -813,  11000, 24746, -2049, -783,  10662, +    25063, -2067, -753,  10326, 25375, -2084, -724,  9993,  25683, -2098, -696,  9663,  25987, +    -2111, -668,  9336,  26285, -2121, -641,  9013,  26579, -2130, -614,  8694,  26867, -2136, +    -588,  8378,  27151, -2141, -563,  8066,  27428, -2142, -538,  7758,  27700, -2142, -514, +    7453,  27966, -2139, -490,  7153,  28226, -2133, -468,  6857,  28480, -2125, -445,  6565, +    28727, -2114, -424,  6277,  28968, -2100, -403,  5994,  29203, -2083, -382,  5716,  29431, +    -2063, -362,  5442,  29652, -2040, -343,  5172,  29866, -2014, -325,  4908,  30072, -1984, +    -307,  4648,  30272, -1951, -289,  4393,  30464, -1914, -272,  4143,  30649, -1874, -256, +    3897,  30826, -1830, -240,  3657,  30995, -1783, -224,  3422,  31157, -1732, -209,  3192, +    31310, -1676, -194,  2967,  31456, -1617, -180,  2747,  31593, -1554, -167,  2532,  31723, +    -1486, -153,  2322,  31844, -1414, -140,  2118,  31956, -1338, -128,  1919,  32061, -1258, +    -115,  1724,  32157, -1173, -103,  1536,  32244, -1084, -92,   1352,  32323, -990,  -80, +    1174,  32393, -891,  -69,   1000,  32454, -788,  -58,   832,   32507, -680,  -47,   669, +    32551, -568,  -36,   512,   32586, -450,  -26,   359,   32613, -328,  -15,   212,   32630, +    -200,  -5,    69,    32639, -68}; + +constexpr std::array<s16, 512> curve_lut2 = { +    3195,  26287, 3329,  -32,   3064,  26281, 3467,  -34,   2936,  26270, 3608,  -38,   2811, +    26253, 3751,  -42,   2688,  26230, 3897,  -46,   2568,  26202, 4046,  -50,   2451,  26169, +    4199,  -54,   2338,  26130, 4354,  -58,   2227,  26085, 4512,  -63,   2120,  26035, 4673, +    -67,   2015,  25980, 4837,  -72,   1912,  25919, 5004,  -76,   1813,  25852, 5174,  -81, +    1716,  25780, 5347,  -87,   1622,  25704, 5522,  -92,   1531,  25621, 5701,  -98,   1442, +    25533, 5882,  -103,  1357,  25440, 6066,  -109,  1274,  25342, 6253,  -115,  1193,  25239, +    6442,  -121,  1115,  25131, 6635,  -127,  1040,  25018, 6830,  -133,  967,   24899, 7027, +    -140,  897,   24776, 7227,  -146,  829,   24648, 7430,  -153,  764,   24516, 7635,  -159, +    701,   24379, 7842,  -166,  641,   24237, 8052,  -174,  583,   24091, 8264,  -181,  526, +    23940, 8478,  -187,  472,   23785, 8695,  -194,  420,   23626, 8914,  -202,  371,   23462, +    9135,  -209,  324,   23295, 9358,  -215,  279,   23123, 9583,  -222,  236,   22948, 9809, +    -230,  194,   22769, 10038, -237,  154,   22586, 10269, -243,  117,   22399, 10501, -250, +    81,    22208, 10735, -258,  47,    22015, 10970, -265,  15,    21818, 11206, -271,  -16, +    21618, 11444, -277,  -44,   21415, 11684, -283,  -71,   21208, 11924, -290,  -97,   20999, +    12166, -296,  -121,  20786, 12409, -302,  -143,  20571, 12653, -306,  -163,  20354, 12898, +    -311,  -183,  20134, 13143, -316,  -201,  19911, 13389, -321,  -218,  19686, 13635, -325, +    -234,  19459, 13882, -328,  -248,  19230, 14130, -332,  -261,  18998, 14377, -335,  -273, +    18765, 14625, -337,  -284,  18531, 14873, -339,  -294,  18295, 15121, -341,  -302,  18057, +    15369, -341,  -310,  17817, 15617, -341,  -317,  17577, 15864, -340,  -323,  17335, 16111, +    -340,  -328,  17092, 16357, -338,  -332,  16848, 16603, -336,  -336,  16603, 16848, -332, +    -338,  16357, 17092, -328,  -340,  16111, 17335, -323,  -340,  15864, 17577, -317,  -341, +    15617, 17817, -310,  -341,  15369, 18057, -302,  -341,  15121, 18295, -294,  -339,  14873, +    18531, -284,  -337,  14625, 18765, -273,  -335,  14377, 18998, -261,  -332,  14130, 19230, +    -248,  -328,  13882, 19459, -234,  -325,  13635, 19686, -218,  -321,  13389, 19911, -201, +    -316,  13143, 20134, -183,  -311,  12898, 20354, -163,  -306,  12653, 20571, -143,  -302, +    12409, 20786, -121,  -296,  12166, 20999, -97,   -290,  11924, 21208, -71,   -283,  11684, +    21415, -44,   -277,  11444, 21618, -16,   -271,  11206, 21818, 15,    -265,  10970, 22015, +    47,    -258,  10735, 22208, 81,    -250,  10501, 22399, 117,   -243,  10269, 22586, 154, +    -237,  10038, 22769, 194,   -230,  9809,  22948, 236,   -222,  9583,  23123, 279,   -215, +    9358,  23295, 324,   -209,  9135,  23462, 371,   -202,  8914,  23626, 420,   -194,  8695, +    23785, 472,   -187,  8478,  23940, 526,   -181,  8264,  24091, 583,   -174,  8052,  24237, +    641,   -166,  7842,  24379, 701,   -159,  7635,  24516, 764,   -153,  7430,  24648, 829, +    -146,  7227,  24776, 897,   -140,  7027,  24899, 967,   -133,  6830,  25018, 1040,  -127, +    6635,  25131, 1115,  -121,  6442,  25239, 1193,  -115,  6253,  25342, 1274,  -109,  6066, +    25440, 1357,  -103,  5882,  25533, 1442,  -98,   5701,  25621, 1531,  -92,   5522,  25704, +    1622,  -87,   5347,  25780, 1716,  -81,   5174,  25852, 1813,  -76,   5004,  25919, 1912, +    -72,   4837,  25980, 2015,  -67,   4673,  26035, 2120,  -63,   4512,  26085, 2227,  -58, +    4354,  26130, 2338,  -54,   4199,  26169, 2451,  -50,   4046,  26202, 2568,  -46,   3897, +    26230, 2688,  -42,   3751,  26253, 2811,  -38,   3608,  26270, 2936,  -34,   3467,  26281, +    3064,  -32,   3329,  26287, 3195};  std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio) {      if (input.size() < 2) @@ -30,40 +149,39 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,          ratio = 1.0;      } -    if (ratio != state.current_ratio) { -        const double cutoff_frequency = std::min(0.5 / ratio, 0.5 * ratio); -        state.nyquist = CascadingFilter::LowPass(std::clamp(cutoff_frequency, 0.0, 0.4), 3); -        state.current_ratio = ratio; -    } -    state.nyquist.Process(input); - -    constexpr std::size_t taps = InterpolationState::lanczos_taps; -    const std::size_t num_frames = input.size() / 2; - -    std::vector<s16> output; -    output.reserve(static_cast<std::size_t>(input.size() / ratio + 4)); - -    double& pos = state.position; -    auto& h = state.history; -    for (std::size_t i = 0; i < num_frames; ++i) { -        std::rotate(h.begin(), h.end() - 1, h.end()); -        h[0][0] = input[i * 2 + 0]; -        h[0][1] = input[i * 2 + 1]; - -        while (pos <= 1.0) { -            double l = 0.0; -            double r = 0.0; -            for (std::size_t j = 0; j < h.size(); j++) { -                const double lanczos_calc = Lanczos(taps, pos + j - taps + 1); -                l += lanczos_calc * h[j][0]; -                r += lanczos_calc * h[j][1]; -            } -            output.emplace_back(static_cast<s16>(std::clamp(l, -32768.0, 32767.0))); -            output.emplace_back(static_cast<s16>(std::clamp(r, -32768.0, 32767.0))); - -            pos += ratio; +    const int step = static_cast<int>(ratio * 0x8000); +    const std::array<s16, 512>& lut = [step] { +        if (step > 0xaaaa) { +            return curve_lut0; +        } +        if (step <= 0x8000) { +            return curve_lut1;          } -        pos -= 1.0; +        return curve_lut2; +    }(); + +    std::vector<s16> output(static_cast<std::size_t>(input.size() / ratio)); +    int in_offset = 0; +    for (std::size_t out_offset = 0; out_offset < output.size(); out_offset += 2) { +        const int lut_index = (state.fraction >> 8) * 4; + +        const int l = input[(in_offset + 0) * 2 + 0] * lut[lut_index + 0] + +                      input[(in_offset + 1) * 2 + 0] * lut[lut_index + 1] + +                      input[(in_offset + 2) * 2 + 0] * lut[lut_index + 2] + +                      input[(in_offset + 3) * 2 + 0] * lut[lut_index + 3]; + +        const int r = input[(in_offset + 0) * 2 + 1] * lut[lut_index + 0] + +                      input[(in_offset + 1) * 2 + 1] * lut[lut_index + 1] + +                      input[(in_offset + 2) * 2 + 1] * lut[lut_index + 2] + +                      input[(in_offset + 3) * 2 + 1] * lut[lut_index + 3]; + +        const int new_offset = state.fraction + step; + +        in_offset += new_offset >> 15; +        state.fraction = new_offset & 0x7fff; + +        output[out_offset + 0] = static_cast<s16>(std::clamp(l >> 15, SHRT_MIN, SHRT_MAX)); +        output[out_offset + 1] = static_cast<s16>(std::clamp(r >> 15, SHRT_MIN, SHRT_MAX));      }      return output; diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h index edbd6460f..1b9831a75 100644 --- a/src/audio_core/algorithm/interpolate.h +++ b/src/audio_core/algorithm/interpolate.h @@ -6,19 +6,12 @@  #include <array>  #include <vector> -#include "audio_core/algorithm/filter.h"  #include "common/common_types.h"  namespace AudioCore {  struct InterpolationState { -    static constexpr std::size_t lanczos_taps = 4; -    static constexpr std::size_t history_size = lanczos_taps * 2 - 1; - -    double current_ratio = 0.0; -    CascadingFilter nyquist; -    std::array<std::array<s16, 2>, history_size> history = {}; -    double position = 0; +    int fraction = 0;  };  /// Interpolates input signal to produce output signal. diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 26612e692..88c06b2ce 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -187,6 +187,8 @@ add_library(core STATIC      hle/kernel/synchronization.h      hle/kernel/thread.cpp      hle/kernel/thread.h +    hle/kernel/time_manager.cpp +    hle/kernel/time_manager.h      hle/kernel/transfer_memory.cpp      hle/kernel/transfer_memory.h      hle/kernel/vm_manager.cpp diff --git a/src/core/core.cpp b/src/core/core.cpp index 0eb0c0dca..86e314c94 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -707,4 +707,12 @@ const Service::SM::ServiceManager& System::ServiceManager() const {      return *impl->service_manager;  } +void System::RegisterCoreThread(std::size_t id) { +    impl->kernel.RegisterCoreThread(id); +} + +void System::RegisterHostThread() { +    impl->kernel.RegisterHostThread(); +} +  } // namespace Core diff --git a/src/core/core.h b/src/core/core.h index e69d68fcf..8d862a8e6 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -360,6 +360,12 @@ public:      const CurrentBuildProcessID& GetCurrentProcessBuildID() const; +    /// Register a host thread as an emulated CPU Core. +    void RegisterCoreThread(std::size_t id); + +    /// Register a host thread as an auxiliary thread. +    void RegisterHostThread(); +  private:      System(); diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h index 213461b6a..b04e046ed 100644 --- a/src/core/hardware_properties.h +++ b/src/core/hardware_properties.h @@ -20,6 +20,8 @@ constexpr u32 NUM_CPU_CORES = 4;            // Number of CPU Cores  } // namespace Hardware +constexpr u32 INVALID_HOST_THREAD_ID = 0xFFFFFFFF; +  struct EmuThreadHandle {      u32 host_handle;      u32 guest_handle; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 4eb1d8703..9232f4d7e 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -3,9 +3,12 @@  // Refer to the license.txt file included.  #include <atomic> +#include <bitset>  #include <functional>  #include <memory>  #include <mutex> +#include <thread> +#include <unordered_map>  #include <utility>  #include "common/assert.h" @@ -15,6 +18,7 @@  #include "core/core.h"  #include "core/core_timing.h"  #include "core/core_timing_util.h" +#include "core/hardware_properties.h"  #include "core/hle/kernel/client_port.h"  #include "core/hle/kernel/errors.h"  #include "core/hle/kernel/handle_table.h" @@ -25,6 +29,7 @@  #include "core/hle/kernel/scheduler.h"  #include "core/hle/kernel/synchronization.h"  #include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h"  #include "core/hle/lock.h"  #include "core/hle/result.h"  #include "core/memory.h" @@ -44,7 +49,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_      std::lock_guard lock{HLE::g_hle_lock};      std::shared_ptr<Thread> thread = -        system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle); +        system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);      if (thread == nullptr) {          LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);          return; @@ -97,8 +102,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_  }  struct KernelCore::Impl { -    explicit Impl(Core::System& system) -        : system{system}, global_scheduler{system}, synchronization{system} {} +    explicit Impl(Core::System& system, KernelCore& kernel) +        : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {}      void Initialize(KernelCore& kernel) {          Shutdown(); @@ -120,7 +125,7 @@ struct KernelCore::Impl {          system_resource_limit = nullptr; -        thread_wakeup_callback_handle_table.Clear(); +        global_handle_table.Clear();          thread_wakeup_event_type = nullptr;          preemption_event = nullptr; @@ -138,8 +143,8 @@ struct KernelCore::Impl {      void InitializePhysicalCores() {          exclusive_monitor = -            Core::MakeExclusiveMonitor(system.Memory(), global_scheduler.CpuCoresCount()); -        for (std::size_t i = 0; i < global_scheduler.CpuCoresCount(); i++) { +            Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES); +        for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {              cores.emplace_back(system, i, *exclusive_monitor);          }      } @@ -184,6 +189,50 @@ struct KernelCore::Impl {          system.Memory().SetCurrentPageTable(*process);      } +    void RegisterCoreThread(std::size_t core_id) { +        std::unique_lock lock{register_thread_mutex}; +        const std::thread::id this_id = std::this_thread::get_id(); +        const auto it = host_thread_ids.find(this_id); +        ASSERT(core_id < Core::Hardware::NUM_CPU_CORES); +        ASSERT(it == host_thread_ids.end()); +        ASSERT(!registered_core_threads[core_id]); +        host_thread_ids[this_id] = static_cast<u32>(core_id); +        registered_core_threads.set(core_id); +    } + +    void RegisterHostThread() { +        std::unique_lock lock{register_thread_mutex}; +        const std::thread::id this_id = std::this_thread::get_id(); +        const auto it = host_thread_ids.find(this_id); +        ASSERT(it == host_thread_ids.end()); +        host_thread_ids[this_id] = registered_thread_ids++; +    } + +    u32 GetCurrentHostThreadID() const { +        const std::thread::id this_id = std::this_thread::get_id(); +        const auto it = host_thread_ids.find(this_id); +        if (it == host_thread_ids.end()) { +            return Core::INVALID_HOST_THREAD_ID; +        } +        return it->second; +    } + +    Core::EmuThreadHandle GetCurrentEmuThreadID() const { +        Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle(); +        result.host_handle = GetCurrentHostThreadID(); +        if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) { +            return result; +        } +        const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler(); +        const Kernel::Thread* current = sched.GetCurrentThread(); +        if (current != nullptr) { +            result.guest_handle = current->GetGlobalHandle(); +        } else { +            result.guest_handle = InvalidHandle; +        } +        return result; +    } +      std::atomic<u32> next_object_id{0};      std::atomic<u64> next_kernel_process_id{Process::InitialKIPIDMin};      std::atomic<u64> next_user_process_id{Process::ProcessIDMin}; @@ -194,15 +243,16 @@ struct KernelCore::Impl {      Process* current_process = nullptr;      Kernel::GlobalScheduler global_scheduler;      Kernel::Synchronization synchronization; +    Kernel::TimeManager time_manager;      std::shared_ptr<ResourceLimit> system_resource_limit;      std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type;      std::shared_ptr<Core::Timing::EventType> preemption_event; -    // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, -    // allowing us to simply use a pool index or similar. -    Kernel::HandleTable thread_wakeup_callback_handle_table; +    // This is the kernel's handle table or supervisor handle table which +    // stores all the objects in place. +    Kernel::HandleTable global_handle_table;      /// Map of named ports managed by the kernel, which can be retrieved using      /// the ConnectToPort SVC. @@ -211,11 +261,17 @@ struct KernelCore::Impl {      std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;      std::vector<Kernel::PhysicalCore> cores; +    // 0-3 IDs represent core threads, >3 represent others +    std::unordered_map<std::thread::id, u32> host_thread_ids; +    u32 registered_thread_ids{Core::Hardware::NUM_CPU_CORES}; +    std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads; +    std::mutex register_thread_mutex; +      // System context      Core::System& system;  }; -KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {} +KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system, *this)} {}  KernelCore::~KernelCore() {      Shutdown();  } @@ -232,9 +288,8 @@ std::shared_ptr<ResourceLimit> KernelCore::GetSystemResourceLimit() const {      return impl->system_resource_limit;  } -std::shared_ptr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable( -    Handle handle) const { -    return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle); +std::shared_ptr<Thread> KernelCore::RetrieveThreadFromGlobalHandleTable(Handle handle) const { +    return impl->global_handle_table.Get<Thread>(handle);  }  void KernelCore::AppendNewProcess(std::shared_ptr<Process> process) { @@ -265,6 +320,14 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {      return impl->global_scheduler;  } +Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) { +    return impl->cores[id].Scheduler(); +} + +const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const { +    return impl->cores[id].Scheduler(); +} +  Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) {      return impl->cores[id];  } @@ -281,6 +344,14 @@ const Kernel::Synchronization& KernelCore::Synchronization() const {      return impl->synchronization;  } +Kernel::TimeManager& KernelCore::TimeManager() { +    return impl->time_manager; +} + +const Kernel::TimeManager& KernelCore::TimeManager() const { +    return impl->time_manager; +} +  Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() {      return *impl->exclusive_monitor;  } @@ -338,12 +409,28 @@ const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallback      return impl->thread_wakeup_event_type;  } -Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() { -    return impl->thread_wakeup_callback_handle_table; +Kernel::HandleTable& KernelCore::GlobalHandleTable() { +    return impl->global_handle_table; +} + +const Kernel::HandleTable& KernelCore::GlobalHandleTable() const { +    return impl->global_handle_table; +} + +void KernelCore::RegisterCoreThread(std::size_t core_id) { +    impl->RegisterCoreThread(core_id); +} + +void KernelCore::RegisterHostThread() { +    impl->RegisterHostThread(); +} + +u32 KernelCore::GetCurrentHostThreadID() const { +    return impl->GetCurrentHostThreadID();  } -const Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() const { -    return impl->thread_wakeup_callback_handle_table; +Core::EmuThreadHandle KernelCore::GetCurrentEmuThreadID() const { +    return impl->GetCurrentEmuThreadID();  }  } // namespace Kernel diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 1eede3063..c4f78ab71 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -11,6 +11,7 @@  #include "core/hle/kernel/object.h"  namespace Core { +struct EmuThreadHandle;  class ExclusiveMonitor;  class System;  } // namespace Core @@ -29,8 +30,10 @@ class HandleTable;  class PhysicalCore;  class Process;  class ResourceLimit; +class Scheduler;  class Synchronization;  class Thread; +class TimeManager;  /// Represents a single instance of the kernel.  class KernelCore { @@ -64,7 +67,7 @@ public:      std::shared_ptr<ResourceLimit> GetSystemResourceLimit() const;      /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table. -    std::shared_ptr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const; +    std::shared_ptr<Thread> RetrieveThreadFromGlobalHandleTable(Handle handle) const;      /// Adds the given shared pointer to an internal list of active processes.      void AppendNewProcess(std::shared_ptr<Process> process); @@ -87,6 +90,12 @@ public:      /// Gets the sole instance of the global scheduler      const Kernel::GlobalScheduler& GlobalScheduler() const; +    /// Gets the sole instance of the Scheduler assoviated with cpu core 'id' +    Kernel::Scheduler& Scheduler(std::size_t id); + +    /// Gets the sole instance of the Scheduler assoviated with cpu core 'id' +    const Kernel::Scheduler& Scheduler(std::size_t id) const; +      /// Gets the an instance of the respective physical CPU core.      Kernel::PhysicalCore& PhysicalCore(std::size_t id); @@ -99,6 +108,12 @@ public:      /// Gets the an instance of the Synchronization Interface.      const Kernel::Synchronization& Synchronization() const; +    /// Gets the an instance of the TimeManager Interface. +    Kernel::TimeManager& TimeManager(); + +    /// Gets the an instance of the TimeManager Interface. +    const Kernel::TimeManager& TimeManager() const; +      /// Stops execution of 'id' core, in order to reschedule a new thread.      void PrepareReschedule(std::size_t id); @@ -120,6 +135,18 @@ public:      /// Determines whether or not the given port is a valid named port.      bool IsValidNamedPort(NamedPortTable::const_iterator port) const; +    /// Gets the current host_thread/guest_thread handle. +    Core::EmuThreadHandle GetCurrentEmuThreadID() const; + +    /// Gets the current host_thread handle. +    u32 GetCurrentHostThreadID() const; + +    /// Register the current thread as a CPU Core Thread. +    void RegisterCoreThread(std::size_t core_id); + +    /// Register the current thread as a non CPU core thread. +    void RegisterHostThread(); +  private:      friend class Object;      friend class Process; @@ -140,11 +167,11 @@ private:      /// Retrieves the event type used for thread wakeup callbacks.      const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const; -    /// Provides a reference to the thread wakeup callback handle table. -    Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); +    /// Provides a reference to the global handle table. +    Kernel::HandleTable& GlobalHandleTable(); -    /// Provides a const reference to the thread wakeup callback handle table. -    const Kernel::HandleTable& ThreadWakeupCallbackHandleTable() const; +    /// Provides a const reference to the global handle table. +    const Kernel::HandleTable& GlobalHandleTable() const;      struct Impl;      std::unique_ptr<Impl> impl; diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 86f1421bf..c65f82fb7 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -18,10 +18,11 @@  #include "core/hle/kernel/kernel.h"  #include "core/hle/kernel/process.h"  #include "core/hle/kernel/scheduler.h" +#include "core/hle/kernel/time_manager.h"  namespace Kernel { -GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} {} +GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {}  GlobalScheduler::~GlobalScheduler() = default; @@ -35,7 +36,7 @@ void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) {  }  void GlobalScheduler::UnloadThread(std::size_t core) { -    Scheduler& sched = system.Scheduler(core); +    Scheduler& sched = kernel.Scheduler(core);      sched.UnloadThread();  } @@ -50,7 +51,7 @@ void GlobalScheduler::SelectThread(std::size_t core) {          sched.is_context_switch_pending = sched.selected_thread != sched.current_thread;          std::atomic_thread_fence(std::memory_order_seq_cst);      }; -    Scheduler& sched = system.Scheduler(core); +    Scheduler& sched = kernel.Scheduler(core);      Thread* current_thread = nullptr;      // Step 1: Get top thread in schedule queue.      current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); @@ -356,6 +357,32 @@ void GlobalScheduler::Shutdown() {      thread_list.clear();  } +void GlobalScheduler::Lock() { +    Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID(); +    if (current_thread == current_owner) { +        ++scope_lock; +    } else { +        inner_lock.lock(); +        current_owner = current_thread; +        ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle()); +        scope_lock = 1; +    } +} + +void GlobalScheduler::Unlock() { +    if (--scope_lock != 0) { +        ASSERT(scope_lock > 0); +        return; +    } +    for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { +        SelectThread(i); +    } +    current_owner = Core::EmuThreadHandle::InvalidHandle(); +    scope_lock = 1; +    inner_lock.unlock(); +    // TODO(Blinkhawk): Setup the interrupts and change context on current core. +} +  Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id)      : system(system), cpu_core(cpu_core), core_id(core_id) {} @@ -485,4 +512,27 @@ void Scheduler::Shutdown() {      selected_thread = nullptr;  } +SchedulerLock::SchedulerLock(KernelCore& kernel) : kernel{kernel} { +    kernel.GlobalScheduler().Lock(); +} + +SchedulerLock::~SchedulerLock() { +    kernel.GlobalScheduler().Unlock(); +} + +SchedulerLockAndSleep::SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, +                                             Thread* time_task, s64 nanoseconds) +    : SchedulerLock{kernel}, event_handle{event_handle}, time_task{time_task}, nanoseconds{ +                                                                                   nanoseconds} { +    event_handle = InvalidHandle; +} + +SchedulerLockAndSleep::~SchedulerLockAndSleep() { +    if (sleep_cancelled) { +        return; +    } +    auto& time_manager = kernel.TimeManager(); +    time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds); +} +  } // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 96db049cb..1c93a838c 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -6,6 +6,7 @@  #include <atomic>  #include <memory> +#include <mutex>  #include <vector>  #include "common/common_types.h" @@ -20,11 +21,13 @@ class System;  namespace Kernel { +class KernelCore;  class Process; +class SchedulerLock;  class GlobalScheduler final {  public: -    explicit GlobalScheduler(Core::System& system); +    explicit GlobalScheduler(KernelCore& kernel);      ~GlobalScheduler();      /// Adds a new thread to the scheduler @@ -138,6 +141,14 @@ public:      void Shutdown();  private: +    friend class SchedulerLock; + +    /// Lock the scheduler to the current thread. +    void Lock(); + +    /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling +    /// and reschedules current core if needed. +    void Unlock();      /**       * Transfers a thread into an specific core. If the destination_core is -1       * it will be unscheduled from its source code and added into its suggested @@ -158,9 +169,14 @@ private:      // ordered from Core 0 to Core 3.      std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62}; +    /// Scheduler lock mechanisms. +    std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock +    std::atomic<s64> scope_lock{}; +    Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()}; +      /// Lists all thread ids that aren't deleted/etc.      std::vector<std::shared_ptr<Thread>> thread_list; -    Core::System& system; +    KernelCore& kernel;  };  class Scheduler final { @@ -227,4 +243,30 @@ private:      bool is_context_switch_pending = false;  }; +class SchedulerLock { +public: +    explicit SchedulerLock(KernelCore& kernel); +    ~SchedulerLock(); + +protected: +    KernelCore& kernel; +}; + +class SchedulerLockAndSleep : public SchedulerLock { +public: +    explicit SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, Thread* time_task, +                                   s64 nanoseconds); +    ~SchedulerLockAndSleep(); + +    void CancelSleep() { +        sleep_cancelled = true; +    } + +private: +    Handle& event_handle; +    Thread* time_task; +    s64 nanoseconds; +    bool sleep_cancelled{}; +}; +  } // namespace Kernel diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index ae5f2c8bd..bf850e0b2 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -46,9 +46,9 @@ Thread::~Thread() = default;  void Thread::Stop() {      // Cancel any outstanding wakeup events for this thread      Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), -                                                             callback_handle); -    kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); -    callback_handle = 0; +                                                             global_handle); +    kernel.GlobalHandleTable().Close(global_handle); +    global_handle = 0;      SetStatus(ThreadStatus::Dead);      Signal(); @@ -73,12 +73,12 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {      // thread-safe version of ScheduleEvent.      const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});      Core::System::GetInstance().CoreTiming().ScheduleEvent( -        cycles, kernel.ThreadWakeupCallbackEventType(), callback_handle); +        cycles, kernel.ThreadWakeupCallbackEventType(), global_handle);  }  void Thread::CancelWakeupTimer() {      Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), -                                                             callback_handle); +                                                             global_handle);  }  void Thread::ResumeFromWait() { @@ -190,7 +190,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin      thread->condvar_wait_address = 0;      thread->wait_handle = 0;      thread->name = std::move(name); -    thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); +    thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap();      thread->owner_process = &owner_process;      auto& scheduler = kernel.GlobalScheduler();      scheduler.AddThread(thread); diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 7a4916318..129e7858a 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -453,6 +453,10 @@ public:          is_sync_cancelled = value;      } +    Handle GetGlobalHandle() const { +        return global_handle; +    } +  private:      void SetSchedulingStatus(ThreadSchedStatus new_status);      void SetCurrentPriority(u32 new_priority); @@ -514,7 +518,7 @@ private:      VAddr arb_wait_address{0};      /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. -    Handle callback_handle = 0; +    Handle global_handle = 0;      /// Callback that will be invoked when the thread is resumed from a waiting state. If the thread      /// was waiting via WaitSynchronization then the object will be the last object that became diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp new file mode 100644 index 000000000..21b290468 --- /dev/null +++ b/src/core/hle/kernel/time_manager.cpp @@ -0,0 +1,44 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/core_timing_util.h" +#include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/thread.h" +#include "core/hle/kernel/time_manager.h" + +namespace Kernel { + +TimeManager::TimeManager(Core::System& system) : system{system} { +    time_manager_event_type = Core::Timing::CreateEvent( +        "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) { +            Handle proper_handle = static_cast<Handle>(thread_handle); +            std::shared_ptr<Thread> thread = +                this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle); +            thread->ResumeFromWait(); +        }); +} + +void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) { +    if (nanoseconds > 0) { +        ASSERT(timetask); +        event_handle = timetask->GetGlobalHandle(); +        const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); +        system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle); +    } else { +        event_handle = InvalidHandle; +    } +} + +void TimeManager::UnscheduleTimeEvent(Handle event_handle) { +    if (event_handle == InvalidHandle) { +        return; +    } +    system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle); +} + +} // namespace Kernel diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h new file mode 100644 index 000000000..eaec486d1 --- /dev/null +++ b/src/core/hle/kernel/time_manager.h @@ -0,0 +1,43 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> + +#include "core/hle/kernel/object.h" + +namespace Core { +class System; +} // namespace Core + +namespace Core::Timing { +struct EventType; +} // namespace Core::Timing + +namespace Kernel { + +class Thread; + +/** + * The `TimeManager` takes care of scheduling time events on threads and executes their TimeUp + * method when the event is triggered. + */ +class TimeManager { +public: +    explicit TimeManager(Core::System& system); + +    /// Schedule a time event on `timetask` thread that will expire in 'nanoseconds' +    /// returns a non-invalid handle in `event_handle` if correctly scheduled +    void ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds); + +    /// Unschedule an existing time event +    void UnscheduleTimeEvent(Handle event_handle); + +private: +    Core::System& system; +    std::shared_ptr<Core::Timing::EventType> time_manager_event_type; +}; + +} // namespace Kernel diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index 15c09f04c..c1e32b28c 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -287,13 +287,13 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {          analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus(              Input::AnalogDirection::DOWN)); -    pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] -                                    ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT)); -    pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] -                                      ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT));      pad_state.r_stick_right.Assign(          analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] -            ->GetAnalogDirectionStatus(Input::AnalogDirection::UP)); +            ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT)); +    pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] +                                      ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT)); +    pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] +                                    ->GetAnalogDirectionStatus(Input::AnalogDirection::UP));      pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]                                        ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN)); diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp index e1a260762..6cabdaa3c 100755 --- a/src/input_common/analog_from_button.cpp +++ b/src/input_common/analog_from_button.cpp @@ -34,6 +34,20 @@ public:                                 y * coef * (x == 0 ? 1.0f : SQRT_HALF));      } +    bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override { +        switch (direction) { +        case Input::AnalogDirection::RIGHT: +            return right->GetStatus(); +        case Input::AnalogDirection::LEFT: +            return left->GetStatus(); +        case Input::AnalogDirection::UP: +            return up->GetStatus(); +        case Input::AnalogDirection::DOWN: +            return down->GetStatus(); +        } +        return false; +    } +  private:      Button up;      Button down; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 26939be3f..6ea7cc6a5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -542,7 +542,7 @@ public:                  BitField<12, 1, InvMemoryLayout> type;              } memory_layout;              union { -                BitField<0, 16, u32> array_mode; +                BitField<0, 16, u32> layers;                  BitField<16, 1, u32> volume;              };              u32 layer_stride; @@ -800,8 +800,12 @@ public:                  u32 zeta_width;                  u32 zeta_height; +                union { +                    BitField<0, 16, u32> zeta_layers; +                    BitField<16, 1, u32> zeta_volume; +                }; -                INSERT_UNION_PADDING_WORDS(0x27); +                INSERT_UNION_PADDING_WORDS(0x26);                  u32 depth_test_enable; @@ -1507,6 +1511,7 @@ ASSERT_REG_POSITION(vertex_attrib_format, 0x458);  ASSERT_REG_POSITION(rt_control, 0x487);  ASSERT_REG_POSITION(zeta_width, 0x48a);  ASSERT_REG_POSITION(zeta_height, 0x48b); +ASSERT_REG_POSITION(zeta_layers, 0x48c);  ASSERT_REG_POSITION(depth_test_enable, 0x4B3);  ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);  ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7d7137109..e8f763ce9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -140,71 +140,6 @@ void GPU::FlushCommands() {      renderer.Rasterizer().FlushCommands();  } -u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { -    ASSERT(format != RenderTargetFormat::NONE); - -    switch (format) { -    case RenderTargetFormat::RGBA32_FLOAT: -    case RenderTargetFormat::RGBA32_UINT: -        return 16; -    case RenderTargetFormat::RGBA16_UINT: -    case RenderTargetFormat::RGBA16_UNORM: -    case RenderTargetFormat::RGBA16_FLOAT: -    case RenderTargetFormat::RGBX16_FLOAT: -    case RenderTargetFormat::RG32_FLOAT: -    case RenderTargetFormat::RG32_UINT: -        return 8; -    case RenderTargetFormat::RGBA8_UNORM: -    case RenderTargetFormat::RGBA8_SNORM: -    case RenderTargetFormat::RGBA8_SRGB: -    case RenderTargetFormat::RGBA8_UINT: -    case RenderTargetFormat::RGB10_A2_UNORM: -    case RenderTargetFormat::BGRA8_UNORM: -    case RenderTargetFormat::BGRA8_SRGB: -    case RenderTargetFormat::RG16_UNORM: -    case RenderTargetFormat::RG16_SNORM: -    case RenderTargetFormat::RG16_UINT: -    case RenderTargetFormat::RG16_SINT: -    case RenderTargetFormat::RG16_FLOAT: -    case RenderTargetFormat::R32_FLOAT: -    case RenderTargetFormat::R11G11B10_FLOAT: -    case RenderTargetFormat::R32_UINT: -        return 4; -    case RenderTargetFormat::R16_UNORM: -    case RenderTargetFormat::R16_SNORM: -    case RenderTargetFormat::R16_UINT: -    case RenderTargetFormat::R16_SINT: -    case RenderTargetFormat::R16_FLOAT: -    case RenderTargetFormat::RG8_UNORM: -    case RenderTargetFormat::RG8_SNORM: -        return 2; -    case RenderTargetFormat::R8_UNORM: -    case RenderTargetFormat::R8_UINT: -        return 1; -    default: -        UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); -        return 1; -    } -} - -u32 DepthFormatBytesPerPixel(DepthFormat format) { -    switch (format) { -    case DepthFormat::Z32_S8_X24_FLOAT: -        return 8; -    case DepthFormat::Z32_FLOAT: -    case DepthFormat::S8_Z24_UNORM: -    case DepthFormat::Z24_X8_UNORM: -    case DepthFormat::Z24_S8_UNORM: -    case DepthFormat::Z24_C8_UNORM: -        return 4; -    case DepthFormat::Z16_UNORM: -        return 2; -    default: -        UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format)); -        return 1; -    } -} -  // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence  // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.  // So the values you see in docs might be multiplied by 4. diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 07727210c..ba8c9d665 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -57,6 +57,7 @@ enum class RenderTargetFormat : u32 {      RG16_UINT = 0xDD,      RG16_FLOAT = 0xDE,      R11G11B10_FLOAT = 0xE0, +    R32_SINT = 0xE3,      R32_UINT = 0xE4,      R32_FLOAT = 0xE5,      B5G6R5_UNORM = 0xE8, @@ -82,12 +83,6 @@ enum class DepthFormat : u32 {      Z32_S8_X24_FLOAT = 0x19,  }; -/// Returns the number of bytes per pixel of each rendertarget format. -u32 RenderTargetBytesPerPixel(RenderTargetFormat format); - -/// Returns the number of bytes per pixel of each depth format. -u32 DepthFormatBytesPerPixel(DepthFormat format); -  struct CommandListHeader;  class DebugContext; diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 2f2fe6859..f2c83266e 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -85,6 +85,7 @@ static constexpr ConversionArray morton_to_linear_fns = {      MortonCopy<true, PixelFormat::RG32UI>,      MortonCopy<true, PixelFormat::RGBX16F>,      MortonCopy<true, PixelFormat::R32UI>, +    MortonCopy<true, PixelFormat::R32I>,      MortonCopy<true, PixelFormat::ASTC_2D_8X8>,      MortonCopy<true, PixelFormat::ASTC_2D_8X5>,      MortonCopy<true, PixelFormat::ASTC_2D_5X4>, @@ -166,6 +167,7 @@ static constexpr ConversionArray linear_to_morton_fns = {      MortonCopy<false, PixelFormat::RG32UI>,      MortonCopy<false, PixelFormat::RGBX16F>,      MortonCopy<false, PixelFormat::R32UI>, +    MortonCopy<false, PixelFormat::R32I>,      nullptr,      nullptr,      nullptr, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index d4b81cd87..cf934b0d8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -87,6 +87,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format      {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI      {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false},                                     // RGBX16F      {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI +    {GL_R32I, GL_RED_INTEGER, GL_INT, false},                                       // R32I      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5      {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_5X4 @@ -260,6 +261,13 @@ CachedSurface::~CachedSurface() = default;  void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {      MICROPROFILE_SCOPE(OpenGL_Texture_Download); +    if (params.IsBuffer()) { +        glGetNamedBufferSubData(texture_buffer.handle, 0, +                                static_cast<GLsizeiptr>(params.GetHostSizeInBytes()), +                                staging_buffer.data()); +        return; +    } +      SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });      for (u32 level = 0; level < params.emulated_levels; ++level) { @@ -398,24 +406,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p  CachedSurfaceView::~CachedSurfaceView() = default;  void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { -    ASSERT(params.num_layers == 1 && params.num_levels == 1); +    ASSERT(params.num_levels == 1); -    const auto& owner_params = surface.GetSurfaceParams(); +    const GLuint texture = surface.GetTexture(); +    if (params.num_layers > 1) { +        // Layered framebuffer attachments +        UNIMPLEMENTED_IF(params.base_layer != 0); + +        switch (params.target) { +        case SurfaceTarget::Texture2DArray: +            glFramebufferTexture(target, attachment, texture, params.base_level); +            break; +        default: +            UNIMPLEMENTED(); +        } +        return; +    } -    switch (owner_params.target) { +    const GLenum view_target = surface.GetTarget(); +    switch (surface.GetSurfaceParams().target) {      case SurfaceTarget::Texture1D: -        glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), -                               params.base_level); +        glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);          break;      case SurfaceTarget::Texture2D: -        glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), -                               params.base_level); +        glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);          break;      case SurfaceTarget::Texture1DArray:      case SurfaceTarget::Texture2DArray:      case SurfaceTarget::TextureCubemap:      case SurfaceTarget::TextureCubeArray: -        glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, +        glFramebufferTextureLayer(target, attachment, texture, params.base_level,                                    params.base_layer);          break;      default: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 331808113..ef66dd141 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -159,12 +159,13 @@ struct FormatTuple {      {vk::Format::eR32G32Uint, Attachable | Storage},             // RG32UI      {vk::Format::eUndefined, {}},                                // RGBX16F      {vk::Format::eR32Uint, Attachable | Storage},                // R32UI +    {vk::Format::eR32Sint, Attachable | Storage},                // R32I      {vk::Format::eAstc8x8UnormBlock, {}},                        // ASTC_2D_8X8      {vk::Format::eUndefined, {}},                                // ASTC_2D_8X5      {vk::Format::eUndefined, {}},                                // ASTC_2D_5X4      {vk::Format::eUndefined, {}},                                // BGRA8_SRGB      {vk::Format::eBc1RgbaSrgbBlock, {}},                         // DXT1_SRGB -    {vk::Format::eUndefined, {}},                                // DXT23_SRGB +    {vk::Format::eBc2SrgbBlock, {}},                             // DXT23_SRGB      {vk::Format::eBc3SrgbBlock, {}},                             // DXT45_SRGB      {vk::Format::eBc7SrgbBlock, {}},                             // BC7U_SRGB      {vk::Format::eR4G4B4A4UnormPack16, Attachable},              // R4G4B4A4U @@ -363,6 +364,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr              return vk::Format::eR8G8B8A8Uint;          case Maxwell::VertexAttribute::Size::Size_32:              return vk::Format::eR32Uint; +        case Maxwell::VertexAttribute::Size::Size_32_32_32_32: +            return vk::Format::eR32G32B32A32Uint;          default:              break;          } diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 588a6835f..886bde3b9 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -107,6 +107,8 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan      features.occlusionQueryPrecise = true;      features.fragmentStoresAndAtomics = true;      features.shaderImageGatherExtended = true; +    features.shaderStorageImageReadWithoutFormat = +        is_shader_storage_img_read_without_format_supported;      features.shaderStorageImageWriteWithoutFormat = true;      features.textureCompressionASTC_LDR = is_optimal_astc_supported; @@ -465,6 +467,8 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK  void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {      const auto supported_features{physical.getFeatures(dldi)}; +    is_shader_storage_img_read_without_format_supported = +        supported_features.shaderStorageImageReadWithoutFormat;      is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);  } @@ -519,6 +523,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti                                          vk::Format::eB10G11R11UfloatPack32,                                          vk::Format::eR32Sfloat,                                          vk::Format::eR32Uint, +                                        vk::Format::eR32Sint,                                          vk::Format::eR16Sfloat,                                          vk::Format::eR16G16B16A16Sfloat,                                          vk::Format::eB8G8R8A8Unorm, @@ -538,6 +543,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti                                          vk::Format::eBc6HUfloatBlock,                                          vk::Format::eBc6HSfloatBlock,                                          vk::Format::eBc1RgbaSrgbBlock, +                                        vk::Format::eBc2SrgbBlock,                                          vk::Format::eBc3SrgbBlock,                                          vk::Format::eBc7SrgbBlock,                                          vk::Format::eAstc4x4SrgbBlock, diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 72603f9f6..2c27ad730 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,6 +122,11 @@ public:          return properties.limits.maxPushConstantsSize;      } +    /// Returns true if Shader storage Image Read Without Format supported. +    bool IsShaderStorageImageReadWithoutFormatSupported() const { +        return is_shader_storage_img_read_without_format_supported; +    } +      /// Returns true if ASTC is natively supported.      bool IsOptimalAstcSupported() const {          return is_optimal_astc_supported; @@ -227,6 +232,8 @@ private:      bool ext_depth_range_unrestricted{};       ///< Support for VK_EXT_depth_range_unrestricted.      bool ext_shader_viewport_index_layer{};    ///< Support for VK_EXT_shader_viewport_index_layer.      bool nv_device_diagnostic_checkpoints{};   ///< Support for VK_NV_device_diagnostic_checkpoints. +    bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage +                                                                ///< image read without format      // Telemetry parameters      std::string vendor_name;                      ///< Device's driver name. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 31c078f6a..3bf86da87 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -611,33 +611,34 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen  std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(      vk::RenderPass renderpass) {      FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), -                            std::numeric_limits<u32>::max()}; +                            std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; -    const auto MarkAsModifiedAndPush = [&](const View& view) { -        if (view == nullptr) { +    const auto try_push = [&](const View& view) { +        if (!view) {              return false;          }          key.views.push_back(view->GetHandle());          key.width = std::min(key.width, view->GetWidth());          key.height = std::min(key.height, view->GetHeight()); +        key.layers = std::min(key.layers, view->GetNumLayers());          return true;      };      for (std::size_t index = 0; index < std::size(color_attachments); ++index) { -        if (MarkAsModifiedAndPush(color_attachments[index])) { +        if (try_push(color_attachments[index])) {              texture_cache.MarkColorBufferInUse(index);          }      } -    if (MarkAsModifiedAndPush(zeta_attachment)) { +    if (try_push(zeta_attachment)) {          texture_cache.MarkDepthBufferInUse();      }      const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);      auto& framebuffer = fbentry->second;      if (is_cache_miss) { -        const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass, -                                                       static_cast<u32>(key.views.size()), -                                                       key.views.data(), key.width, key.height, 1); +        const vk::FramebufferCreateInfo framebuffer_ci( +            {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width, +            key.height, key.layers);          const auto dev = device.GetLogical();          const auto& dld = device.GetDispatchLoader();          framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 138903d60..4dc8af6e8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -56,6 +56,7 @@ struct FramebufferCacheKey {      vk::RenderPass renderpass{};      u32 width = 0;      u32 height = 0; +    u32 layers = 0;      ImageViewsPack views;      std::size_t Hash() const noexcept { @@ -66,12 +67,17 @@ struct FramebufferCacheKey {          }          boost::hash_combine(hash, width);          boost::hash_combine(hash, height); +        boost::hash_combine(hash, layers);          return hash;      }      bool operator==(const FramebufferCacheKey& rhs) const noexcept { -        return std::tie(renderpass, views, width, height) == -               std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); +        return std::tie(renderpass, views, width, height, layers) == +               std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); +    } + +    bool operator!=(const FramebufferCacheKey& rhs) const noexcept { +        return !operator==(rhs);      }  }; diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 0a8ec8398..204b7c39c 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -23,7 +23,14 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4>      } else if (color == std::array<float, 4>{1, 1, 1, 1}) {          return vk::BorderColor::eFloatOpaqueWhite;      } else { -        return {}; +        if (color[0] + color[1] + color[2] > 1.35f) { +            // If color elements are brighter than roughly 0.5 average, use white border +            return vk::BorderColor::eFloatOpaqueWhite; +        } +        if (color[3] > 0.5f) { +            return vk::BorderColor::eFloatOpaqueBlack; +        } +        return vk::BorderColor::eFloatTransparentBlack;      }  } @@ -37,8 +44,6 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)      const auto border_color{tsc.GetBorderColor()};      const auto vk_border_color{TryConvertBorderColor(border_color)}; -    UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}", -                         border_color[0], border_color[1], border_color[2], border_color[3]);      constexpr bool unnormalized_coords{false}; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index f64f5da28..2da622d15 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -86,6 +86,7 @@ struct AttributeType {  struct VertexIndices {      std::optional<u32> position; +    std::optional<u32> layer;      std::optional<u32> viewport;      std::optional<u32> point_size;      std::optional<u32> clip_distances; @@ -284,14 +285,20 @@ public:          AddExtension("SPV_KHR_variable_pointers");          AddExtension("SPV_KHR_shader_draw_parameters"); -        if (ir.UsesViewportIndex()) { -            AddCapability(spv::Capability::MultiViewport); -            if (device.IsExtShaderViewportIndexLayerSupported()) { +        if (ir.UsesLayer() || ir.UsesViewportIndex()) { +            if (ir.UsesViewportIndex()) { +                AddCapability(spv::Capability::MultiViewport); +            } +            if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {                  AddExtension("SPV_EXT_shader_viewport_index_layer");                  AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);              }          } +        if (device.IsShaderStorageImageReadWithoutFormatSupported()) { +            AddCapability(spv::Capability::StorageImageReadWithoutFormat); +        } +          if (device.IsFloat16Supported()) {              AddCapability(spv::Capability::Float16);          } @@ -924,13 +931,22 @@ private:          VertexIndices indices;          indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); +        if (ir.UsesLayer()) { +            if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { +                indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); +            } else { +                LOG_ERROR( +                    Render_Vulkan, +                    "Shader requires Layer but it's not supported on this stage with this device."); +            } +        } +          if (ir.UsesViewportIndex()) {              if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {                  indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");              } else { -                LOG_ERROR(Render_Vulkan, -                          "Shader requires ViewportIndex but it's not supported on this " -                          "stage with this device."); +                LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " +                                         "this stage with this device.");              }          } @@ -1292,6 +1308,13 @@ private:                  }                  case Attribute::Index::LayerViewportPointSize:                      switch (element) { +                    case 1: { +                        if (!out_indices.layer) { +                            return {}; +                        } +                        const u32 index = out_indices.layer.value(); +                        return {AccessElement(t_out_int, out_vertex, index), Type::Int}; +                    }                      case 2: {                          if (!out_indices.viewport) {                              return {}; @@ -1362,6 +1385,11 @@ private:              UNIMPLEMENTED();          } +        if (!target.id) { +            // On failure we return a nullptr target.id, skip these stores. +            return {}; +        } +          OpStore(target.id, As(Visit(src), target.type));          return {};      } @@ -1755,8 +1783,16 @@ private:      }      Expression ImageLoad(Operation operation) { -        UNIMPLEMENTED(); -        return {}; +        if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { +            return {v_float_zero, Type::Float}; +        } + +        const auto& meta{std::get<MetaImage>(operation.GetMeta())}; + +        const Id coords = GetCoordinates(operation, Type::Int); +        const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); + +        return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};      }      Expression ImageStore(Operation operation) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d3edbe80c..22e3d34de 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -151,6 +151,10 @@ public:          return params.GetMipHeight(base_level);      } +    u32 GetNumLayers() const { +        return num_layers; +    } +      bool IsBufferView() const {          return buffer_view;      } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 542636430..bee7d8cad 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -527,7 +527,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,      const bool is_bindless = bindless_reg.has_value();      UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow); -    ASSERT_MSG(texture_type != TextureType::Texture3D || is_array || is_shadow, +    ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,                 "Illegal texture type");      const SamplerInfo info{texture_type, is_array, is_shadow, false}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1655ccf16..9707c353d 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -155,6 +155,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)          return PixelFormat::R16I;      case Tegra::RenderTargetFormat::R32_FLOAT:          return PixelFormat::R32F; +    case Tegra::RenderTargetFormat::R32_SINT: +        return PixelFormat::R32I;      case Tegra::RenderTargetFormat::R32_UINT:          return PixelFormat::R32UI;      case Tegra::RenderTargetFormat::RG32_UINT: diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 0d17a93ed..d88109e5a 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -59,47 +59,48 @@ enum class PixelFormat {      RG32UI = 41,      RGBX16F = 42,      R32UI = 43, -    ASTC_2D_8X8 = 44, -    ASTC_2D_8X5 = 45, -    ASTC_2D_5X4 = 46, -    BGRA8_SRGB = 47, -    DXT1_SRGB = 48, -    DXT23_SRGB = 49, -    DXT45_SRGB = 50, -    BC7U_SRGB = 51, -    R4G4B4A4U = 52, -    ASTC_2D_4X4_SRGB = 53, -    ASTC_2D_8X8_SRGB = 54, -    ASTC_2D_8X5_SRGB = 55, -    ASTC_2D_5X4_SRGB = 56, -    ASTC_2D_5X5 = 57, -    ASTC_2D_5X5_SRGB = 58, -    ASTC_2D_10X8 = 59, -    ASTC_2D_10X8_SRGB = 60, -    ASTC_2D_6X6 = 61, -    ASTC_2D_6X6_SRGB = 62, -    ASTC_2D_10X10 = 63, -    ASTC_2D_10X10_SRGB = 64, -    ASTC_2D_12X12 = 65, -    ASTC_2D_12X12_SRGB = 66, -    ASTC_2D_8X6 = 67, -    ASTC_2D_8X6_SRGB = 68, -    ASTC_2D_6X5 = 69, -    ASTC_2D_6X5_SRGB = 70, -    E5B9G9R9F = 71, +    R32I = 44, +    ASTC_2D_8X8 = 45, +    ASTC_2D_8X5 = 46, +    ASTC_2D_5X4 = 47, +    BGRA8_SRGB = 48, +    DXT1_SRGB = 49, +    DXT23_SRGB = 50, +    DXT45_SRGB = 51, +    BC7U_SRGB = 52, +    R4G4B4A4U = 53, +    ASTC_2D_4X4_SRGB = 54, +    ASTC_2D_8X8_SRGB = 55, +    ASTC_2D_8X5_SRGB = 56, +    ASTC_2D_5X4_SRGB = 57, +    ASTC_2D_5X5 = 58, +    ASTC_2D_5X5_SRGB = 59, +    ASTC_2D_10X8 = 60, +    ASTC_2D_10X8_SRGB = 61, +    ASTC_2D_6X6 = 62, +    ASTC_2D_6X6_SRGB = 63, +    ASTC_2D_10X10 = 64, +    ASTC_2D_10X10_SRGB = 65, +    ASTC_2D_12X12 = 66, +    ASTC_2D_12X12_SRGB = 67, +    ASTC_2D_8X6 = 68, +    ASTC_2D_8X6_SRGB = 69, +    ASTC_2D_6X5 = 70, +    ASTC_2D_6X5_SRGB = 71, +    E5B9G9R9F = 72,      MaxColorFormat,      // Depth formats -    Z32F = 72, -    Z16 = 73, +    Z32F = 73, +    Z16 = 74,      MaxDepthFormat,      // DepthStencil formats -    Z24S8 = 74, -    S8Z24 = 75, -    Z32FS8 = 76, +    Z24S8 = 75, +    S8Z24 = 76, +    Z32FS8 = 77,      MaxDepthStencilFormat, @@ -171,6 +172,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{      0, // RG32UI      0, // RGBX16F      0, // R32UI +    0, // R32I      2, // ASTC_2D_8X8      2, // ASTC_2D_8X5      2, // ASTC_2D_5X4 @@ -267,6 +269,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{      1,  // RG32UI      1,  // RGBX16F      1,  // R32UI +    1,  // R32I      8,  // ASTC_2D_8X8      8,  // ASTC_2D_8X5      5,  // ASTC_2D_5X4 @@ -355,6 +358,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{      1,  // RG32UI      1,  // RGBX16F      1,  // R32UI +    1,  // R32I      8,  // ASTC_2D_8X8      5,  // ASTC_2D_8X5      4,  // ASTC_2D_5X4 @@ -443,6 +447,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{      64,  // RG32UI      64,  // RGBX16F      32,  // R32UI +    32,  // R32I      128, // ASTC_2D_8X8      128, // ASTC_2D_8X5      128, // ASTC_2D_5X4 @@ -546,6 +551,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table      SurfaceCompression::None,       // RG32UI      SurfaceCompression::None,       // RGBX16F      SurfaceCompression::None,       // R32UI +    SurfaceCompression::None,       // R32I      SurfaceCompression::Converted,  // ASTC_2D_8X8      SurfaceCompression::Converted,  // ASTC_2D_8X5      SurfaceCompression::Converted,  // ASTC_2D_5X4 diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 81fb9f633..cc3ad8417 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,7 +41,7 @@ struct Table {      ComponentType alpha_component;      bool is_srgb;  }; -constexpr std::array<Table, 74> DefinitionTable = {{ +constexpr std::array<Table, 75> DefinitionTable = {{      {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},      {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},      {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, @@ -89,6 +89,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{      {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},      {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, +    {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I},      {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 84469b7ba..002df414f 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -277,6 +277,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,              SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,                          staging_buffer.data() + host_offset, level);          } +    } else if (params.IsBuffer()) { +        // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest +        // memory. +        std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);      } else {          ASSERT(params.target == SurfaceTarget::Texture2D);          ASSERT(params.num_levels == 1); diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 38b3a4ba8..f00839313 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -84,19 +84,16 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta      if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {          switch (params.pixel_format) {          case PixelFormat::R16U: -        case PixelFormat::R16F: { +        case PixelFormat::R16F:              params.pixel_format = PixelFormat::Z16;              break; -        } -        case PixelFormat::R32F: { +        case PixelFormat::R32F:              params.pixel_format = PixelFormat::Z32F;              break; -        } -        default: { +        default:              UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",                                static_cast<u32>(params.pixel_format));          } -        }          params.type = GetFormatType(params.pixel_format);      }      params.type = GetFormatType(params.pixel_format); @@ -168,27 +165,29 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl      return params;  } -SurfaceParams SurfaceParams::CreateForDepthBuffer( -    Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, -    u32 block_width, u32 block_height, u32 block_depth, -    Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { +SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { +    const auto& regs = system.GPU().Maxwell3D().regs; +    regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type;      SurfaceParams params; -    params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; +    params.is_tiled = regs.zeta.memory_layout.type == +                      Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;      params.srgb_conversion = false; -    params.block_width = std::min(block_width, 5U); -    params.block_height = std::min(block_height, 5U); -    params.block_depth = std::min(block_depth, 5U); +    params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U); +    params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U); +    params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);      params.tile_width_spacing = 1; -    params.pixel_format = PixelFormatFromDepthFormat(format); +    params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);      params.type = GetFormatType(params.pixel_format); -    params.width = zeta_width; -    params.height = zeta_height; -    params.target = SurfaceTarget::Texture2D; -    params.depth = 1; +    params.width = regs.zeta_width; +    params.height = regs.zeta_height;      params.pitch = 0;      params.num_levels = 1;      params.emulated_levels = 1; -    params.is_layered = false; + +    const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0; +    params.is_layered = is_layered; +    params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; +    params.depth = is_layered ? regs.zeta_layers.Value() : 1U;      return params;  } @@ -214,11 +213,13 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz          params.width = params.pitch / bpp;      }      params.height = config.height; -    params.depth = 1; -    params.target = SurfaceTarget::Texture2D;      params.num_levels = 1;      params.emulated_levels = 1; -    params.is_layered = false; + +    const bool is_layered = config.layers > 1 && params.block_depth == 0; +    params.is_layered = is_layered; +    params.depth = is_layered ? config.layers.Value() : 1; +    params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;      return params;  } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 9256fd6d9..995cc3818 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -35,10 +35,7 @@ public:                                          const VideoCommon::Shader::Image& entry);      /// Creates SurfaceCachedParams for a depth buffer configuration. -    static SurfaceParams CreateForDepthBuffer( -        Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, -        u32 block_width, u32 block_height, u32 block_depth, -        Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); +    static SurfaceParams CreateForDepthBuffer(Core::System& system);      /// Creates SurfaceCachedParams from a framebuffer configuration.      static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f4c015635..c70e4aec2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -160,10 +160,7 @@ public:              SetEmptyDepthBuffer();              return {};          } -        const auto depth_params{SurfaceParams::CreateForDepthBuffer( -            system, regs.zeta_width, regs.zeta_height, regs.zeta.format, -            regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, -            regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; +        const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};          auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);          if (depth_buffer.target)              depth_buffer.target->MarkAsRenderTarget(false, NO_RT); @@ -721,7 +718,6 @@ private:      std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,                                            const SurfaceParams& params, bool preserve_contents,                                            bool is_render) { -          // Step 1          // Check Level 1 Cache for a fast structural match. If candidate surface          // matches at certain level we are pretty much done. @@ -733,14 +729,18 @@ private:                  return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,                                        topological_result);              } +              const auto struct_result = current_surface->MatchesStructure(params); -            if (struct_result != MatchStructureResult::None && -                (params.target != SurfaceTarget::Texture3D || -                 current_surface->MatchTarget(params.target))) { -                if (struct_result == MatchStructureResult::FullMatch) { -                    return ManageStructuralMatch(current_surface, params, is_render); -                } else { -                    return RebuildSurface(current_surface, params, is_render); +            if (struct_result != MatchStructureResult::None) { +                const auto& old_params = current_surface->GetSurfaceParams(); +                const bool not_3d = params.target != SurfaceTarget::Texture3D && +                                    old_params.target != SurfaceTarget::Texture3D; +                if (not_3d || current_surface->MatchTarget(params.target)) { +                    if (struct_result == MatchStructureResult::FullMatch) { +                        return ManageStructuralMatch(current_surface, params, is_render); +                    } else { +                        return RebuildSurface(current_surface, params, is_render); +                    }                  }              }          } | 
