diff options
Diffstat (limited to 'src')
67 files changed, 1181 insertions, 340 deletions
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 201ec7a3c..b2e5d336c 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -46,16 +46,18 @@ struct AudioRendererParameter { u32_le sample_rate; u32_le sample_count; u32_le mix_buffer_count; - u32_le unknown_c; + u32_le submix_count; u32_le voice_count; u32_le sink_count; u32_le effect_count; - u32_le unknown_1c; - u8 unknown_20; - INSERT_PADDING_BYTES(3); + u32_le performance_frame_count; + u8 is_voice_drop_enabled; + u8 unknown_21; + u8 unknown_22; + u8 execution_mode; u32_le splitter_count; - u32_le unknown_2c; - INSERT_PADDING_WORDS(1); + u32_le num_splitter_send_channels; + u32_le unknown_30; u32_le revision; }; static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size"); diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp index 454de798b..c5a0d98ce 100644 --- a/src/audio_core/codec.cpp +++ b/src/audio_core/codec.cpp @@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM } } - state.yn1 = yn1; - state.yn2 = yn2; + state.yn1 = static_cast<s16>(yn1); + state.yn2 = static_cast<s16>(yn2); return ret; } diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index 097328901..1da0b9f2a 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp @@ -12,6 +12,10 @@ #include "common/ring_buffer.h" #include "core/settings.h" +#ifdef _MSC_VER +#include <objbase.h> +#endif + namespace AudioCore { class CubebSinkStream final : public SinkStream { @@ -46,7 +50,7 @@ public: } } - ~CubebSinkStream() { + ~CubebSinkStream() override { if (!ctx) { return; } @@ -75,11 +79,11 @@ public: queue.Push(samples); } - std::size_t SamplesInQueue(u32 num_channels) const override { + std::size_t SamplesInQueue(u32 channel_count) const override { if (!ctx) return 0; - return queue.Size() / num_channels; + return queue.Size() / channel_count; } void Flush() override { @@ -98,7 +102,7 @@ private: u32 num_channels{}; Common::RingBuffer<s16, 0x10000> queue; - std::array<s16, 2> last_frame; + std::array<s16, 2> last_frame{}; std::atomic<bool> should_flush{}; TimeStretcher time_stretch; @@ -108,6 +112,11 @@ private: }; CubebSink::CubebSink(std::string_view target_device_name) { + // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows +#ifdef _MSC_VER + com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); +#endif + if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); return; @@ -142,6 +151,12 @@ CubebSink::~CubebSink() { } cubeb_destroy(ctx); + +#ifdef _MSC_VER + if (SUCCEEDED(com_init_result)) { + CoUninitialize(); + } +#endif } SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h index efb9d1634..511df7bb1 100644 --- a/src/audio_core/cubeb_sink.h +++ b/src/audio_core/cubeb_sink.h @@ -25,6 +25,10 @@ private: cubeb* ctx{}; cubeb_devid output_device{}; std::vector<SinkStreamPtr> sink_streams; + +#ifdef _MSC_VER + u32 com_init_result = 0; +#endif }; std::vector<std::string> ListCubebSinkDevices(); diff --git a/src/common/color.h b/src/common/color.h index 0379040be..3a2222077 100644 --- a/src/common/color.h +++ b/src/common/color.h @@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) { /** * Decode a color stored in RGBA8 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) { return {bytes[3], bytes[2], bytes[1], bytes[0]}; } /** * Decode a color stored in RGB8 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) { return {bytes[2], bytes[1], bytes[0], 255}; } /** * Decode a color stored in RG8 (aka HILO8) format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRG8(const u8* bytes) { +inline Common::Vec4<u8> DecodeRG8(const u8* bytes) { return {bytes[1], bytes[0], 0, 255}; } /** * Decode a color stored in RGB565 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) { u16_le pixel; std::memcpy(&pixel, bytes, sizeof(pixel)); return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), @@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { /** * Decode a color stored in RGB5A1 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) { u16_le pixel; std::memcpy(&pixel, bytes, sizeof(pixel)); return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), @@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { /** * Decode a color stored in RGBA4 format * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> + * @return Result color decoded as Common::Vec4<u8> */ -inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { +inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) { u16_le pixel; std::memcpy(&pixel, bytes, sizeof(pixel)); return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), @@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) { /** * Decode a depth value and a stencil value stored in D24S8 format * @param bytes Pointer to encoded source values - * @return Resulting values stored as a Math::Vec2 + * @return Resulting values stored as a Common::Vec2 */ -inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { +inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) { return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]}; } @@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) { bytes[3] = color.r(); bytes[2] = color.g(); bytes[1] = color.b(); @@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) { bytes[2] = color.r(); bytes[1] = color.g(); bytes[0] = color.b(); @@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) { bytes[1] = color.r(); bytes[0] = color.g(); } @@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) { const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); @@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) { const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) | (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); @@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { * @param color Source color to encode * @param bytes Destination pointer to store encoded color */ -inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { +inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) { const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); diff --git a/src/common/math_util.h b/src/common/math_util.h index 94b4394c5..cff3d48c5 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -7,7 +7,7 @@ #include <cstdlib> #include <type_traits> -namespace MathUtil { +namespace Common { constexpr float PI = 3.14159265f; @@ -41,4 +41,4 @@ struct Rectangle { } }; -} // namespace MathUtil +} // namespace Common diff --git a/src/common/quaternion.h b/src/common/quaternion.h index c528c0b68..370198ae0 100644 --- a/src/common/quaternion.h +++ b/src/common/quaternion.h @@ -6,12 +6,12 @@ #include "common/vector_math.h" -namespace Math { +namespace Common { template <typename T> class Quaternion { public: - Math::Vec3<T> xyz; + Vec3<T> xyz; T w{}; Quaternion<decltype(-T{})> Inverse() const { @@ -38,12 +38,12 @@ public: }; template <typename T> -auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) { +auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) { return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w); } -inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) { +inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) { return {axis * std::sin(angle / 2), std::cos(angle / 2)}; } -} // namespace Math +} // namespace Common diff --git a/src/common/swap.h b/src/common/swap.h index 32af0b6ac..0e219747f 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -28,8 +28,8 @@ #include <cstring> #include "common/common_types.h" -// GCC 4.6+ -#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +// GCC +#ifdef __GNUC__ #if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN) #define COMMON_LITTLE_ENDIAN 1 @@ -38,7 +38,7 @@ #endif // LLVM/clang -#elif __clang__ +#elif defined(__clang__) #if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN) #define COMMON_LITTLE_ENDIAN 1 diff --git a/src/common/vector_math.h b/src/common/vector_math.h index 8feb49941..429485329 100644 --- a/src/common/vector_math.h +++ b/src/common/vector_math.h @@ -33,7 +33,7 @@ #include <cmath> #include <type_traits> -namespace Math { +namespace Common { template <typename T> class Vec2; @@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) { return MakeVec(x, yzw[0], yzw[1], yzw[2]); } -} // namespace Math +} // namespace Common diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp index 515626658..75fc04302 100644 --- a/src/core/file_sys/vfs_vector.cpp +++ b/src/core/file_sys/vfs_vector.cpp @@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_ if (offset + length > data.size()) data.resize(offset + length); const auto write = std::min(length, data.size() - offset); - std::memcpy(data.data(), data_, write); + std::memcpy(data.data() + offset, data_, write); return write; } diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index 9dd493efb..e29afd630 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp @@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right); } -std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) { +std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const { new_x = std::max(new_x, framebuffer_layout.screen.left); new_x = std::min(new_x, framebuffer_layout.screen.right - 1); diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 7006a37b3..d0bcb4660 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -166,7 +166,7 @@ private: /** * Clip the provided coordinates to be inside the touchscreen area. */ - std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y); + std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const; }; } // namespace Core::Frontend diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp index f8662d193..a1357179f 100644 --- a/src/core/frontend/framebuffer_layout.cpp +++ b/src/core/frontend/framebuffer_layout.cpp @@ -12,12 +12,12 @@ namespace Layout { // Finds the largest size subrectangle contained in window area that is confined to the aspect ratio template <class T> -static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area, - float screen_aspect_ratio) { +static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area, + float screen_aspect_ratio) { float scale = std::min(static_cast<float>(window_area.GetWidth()), window_area.GetHeight() / screen_aspect_ratio); - return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), - static_cast<T>(std::round(scale * screen_aspect_ratio))}; + return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), + static_cast<T>(std::round(scale * screen_aspect_ratio))}; } FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { @@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / ScreenUndocked::Width}; - MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height}; - MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio); + Common::Rectangle<unsigned> screen_window_area{0, 0, width, height}; + Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio); float window_aspect_ratio = static_cast<float>(height) / width; diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h index e06647794..c2c63d08c 100644 --- a/src/core/frontend/framebuffer_layout.h +++ b/src/core/frontend/framebuffer_layout.h @@ -16,7 +16,7 @@ struct FramebufferLayout { unsigned width{ScreenUndocked::Width}; unsigned height{ScreenUndocked::Height}; - MathUtil::Rectangle<unsigned> screen; + Common::Rectangle<unsigned> screen; /** * Returns the ration of pixel size of the screen, compared to the native size of the undocked diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h index 16fdcd376..7c11d7546 100644 --- a/src/core/frontend/input.h +++ b/src/core/frontend/input.h @@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>; * Orientation is determined by right-hand rule. * Units: deg/sec */ -using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; +using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>; /** * A touch device is an input device that returns a tuple of two floats and a bool. The floats are diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h index d17eb0cb6..8097b3863 100644 --- a/src/core/hle/kernel/errors.h +++ b/src/core/hle/kernel/errors.h @@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7}; constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; +constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104}; constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp index c8acde5b1..bdfaa977f 100644 --- a/src/core/hle/kernel/handle_table.cpp +++ b/src/core/hle/kernel/handle_table.cpp @@ -14,32 +14,47 @@ namespace Kernel { namespace { constexpr u16 GetSlot(Handle handle) { - return handle >> 15; + return static_cast<u16>(handle >> 15); } constexpr u16 GetGeneration(Handle handle) { - return handle & 0x7FFF; + return static_cast<u16>(handle & 0x7FFF); } } // Anonymous namespace HandleTable::HandleTable() { - next_generation = 1; Clear(); } HandleTable::~HandleTable() = default; +ResultCode HandleTable::SetSize(s32 handle_table_size) { + if (static_cast<u32>(handle_table_size) > MAX_COUNT) { + return ERR_OUT_OF_MEMORY; + } + + // Values less than or equal to zero indicate to use the maximum allowable + // size for the handle table in the actual kernel, so we ignore the given + // value in that case, since we assume this by default unless this function + // is called. + if (handle_table_size > 0) { + table_size = static_cast<u16>(handle_table_size); + } + + return RESULT_SUCCESS; +} + ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { DEBUG_ASSERT(obj != nullptr); - u16 slot = next_free_slot; - if (slot >= generations.size()) { + const u16 slot = next_free_slot; + if (slot >= table_size) { LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); return ERR_HANDLE_TABLE_FULL; } next_free_slot = generations[slot]; - u16 generation = next_generation++; + const u16 generation = next_generation++; // Overflow count so it fits in the 15 bits dedicated to the generation in the handle. // Horizon OS uses zero to represent an invalid handle, so skip to 1. @@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) { } ResultCode HandleTable::Close(Handle handle) { - if (!IsValid(handle)) + if (!IsValid(handle)) { return ERR_INVALID_HANDLE; + } - u16 slot = GetSlot(handle); + const u16 slot = GetSlot(handle); objects[slot] = nullptr; @@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) { } bool HandleTable::IsValid(Handle handle) const { - std::size_t slot = GetSlot(handle); - u16 generation = GetGeneration(handle); + const std::size_t slot = GetSlot(handle); + const u16 generation = GetGeneration(handle); - return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation; + return slot < table_size && objects[slot] != nullptr && generations[slot] == generation; } SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { @@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { } void HandleTable::Clear() { - for (u16 i = 0; i < MAX_COUNT; ++i) { + for (u16 i = 0; i < table_size; ++i) { generations[i] = i + 1; objects[i] = nullptr; } diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h index 89a3bc740..44901391b 100644 --- a/src/core/hle/kernel/handle_table.h +++ b/src/core/hle/kernel/handle_table.h @@ -50,6 +50,20 @@ public: ~HandleTable(); /** + * Sets the number of handles that may be in use at one time + * for this handle table. + * + * @param handle_table_size The desired size to limit the handle table to. + * + * @returns an error code indicating if initialization was successful. + * If initialization was not successful, then ERR_OUT_OF_MEMORY + * will be returned. + * + * @pre handle_table_size must be within the range [0, 1024] + */ + ResultCode SetSize(s32 handle_table_size); + + /** * Allocates a handle for the given object. * @return The created Handle or one of the following errors: * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded. @@ -104,13 +118,20 @@ private: std::array<u16, MAX_COUNT> generations; /** + * The limited size of the handle table. This can be specified by process + * capabilities in order to restrict the overall number of handles that + * can be created in a process instance + */ + u16 table_size = static_cast<u16>(MAX_COUNT); + + /** * Global counter of the number of created handles. Stored in `generations` when a handle is * created, and wraps around to 1 when it hits 0x8000. */ - u16 next_generation; + u16 next_generation = 1; /// Head of the free slots linked list. - u16 next_free_slot; + u16 next_free_slot = 0; }; } // namespace Kernel diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index c5aa19afa..8009150e0 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -99,7 +99,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { vm_manager.Reset(metadata.GetAddressSpaceType()); const auto& caps = metadata.GetKernelCapabilities(); - return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); + const auto capability_init_result = + capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); + if (capability_init_result.IsError()) { + return capability_init_result; + } + + return handle_table.SetSize(capabilities.GetHandleTableSize()); } void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp index 3a2164b25..583e35b79 100644 --- a/src/core/hle/kernel/process_capability.cpp +++ b/src/core/hle/kernel/process_capability.cpp @@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() { interrupt_capabilities.set(); // Allow using the maximum possible amount of handles - handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT); + handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT); // Allow all debugging capabilities. is_debuggable = true; @@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) { return ERR_RESERVED_VALUE; } - handle_table_size = (flags >> 16) & 0x3FF; + handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF); return RESULT_SUCCESS; } diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h index fbc8812a3..5cdd80747 100644 --- a/src/core/hle/kernel/process_capability.h +++ b/src/core/hle/kernel/process_capability.h @@ -156,7 +156,7 @@ public: } /// Gets the number of total allowable handles for the process' handle table. - u32 GetHandleTableSize() const { + s32 GetHandleTableSize() const { return handle_table_size; } @@ -252,7 +252,7 @@ private: u64 core_mask = 0; u64 priority_mask = 0; - u32 handle_table_size = 0; + s32 handle_table_size = 0; u32 kernel_version = 0; ProgramType program_type = ProgramType::SysModule; diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 7e0cc64a8..ea8f9d0bb 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -37,7 +37,7 @@ public: {8, &IAudioRenderer::SetRenderingTimeLimit, "SetRenderingTimeLimit"}, {9, &IAudioRenderer::GetRenderingTimeLimit, "GetRenderingTimeLimit"}, {10, &IAudioRenderer::RequestUpdateImpl, "RequestUpdateAuto"}, - {11, nullptr, "ExecuteAudioRendererRendering"}, + {11, &IAudioRenderer::ExecuteAudioRendererRendering, "ExecuteAudioRendererRendering"}, }; // clang-format on RegisterHandlers(functions); @@ -138,6 +138,17 @@ private: rb.Push(rendering_time_limit_percent); } + void ExecuteAudioRendererRendering(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_Audio, "called"); + + // This service command currently only reports an unsupported operation + // error code, or aborts. Given that, we just always return an error + // code in this case. + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultCode{ErrorModule::Audio, 201}); + } + Kernel::EventPair system_event; std::unique_ptr<AudioCore::AudioRenderer> renderer; u32 rendering_time_limit_percent = 100; @@ -235,7 +246,7 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") { {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"}, - {3, nullptr, "OpenAudioRendererAuto"}, + {3, &AudRenU::OpenAudioRendererAuto, "OpenAudioRendererAuto"}, {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"}, }; // clang-format on @@ -248,12 +259,7 @@ AudRenU::~AudRenU() = default; void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Audio, "called"); - IPC::RequestParser rp{ctx}; - auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); - IPC::ResponseBuilder rb{ctx, 2, 0, 1}; - - rb.Push(RESULT_SUCCESS); - rb.PushIpcInterface<Audio::IAudioRenderer>(std::move(params)); + OpenAudioRendererImpl(ctx); } void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { @@ -262,20 +268,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Audio, "called"); u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); - buffer_sz += params.unknown_c * 1024; - buffer_sz += 0x940 * (params.unknown_c + 1); + buffer_sz += params.submix_count * 1024; + buffer_sz += 0x940 * (params.submix_count + 1); buffer_sz += 0x3F0 * params.voice_count; - buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10); + buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10); buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); - buffer_sz += - Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * - (params.mix_buffer_count + 6), - 0x40); + buffer_sz += Common::AlignUp( + (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) * + (params.mix_buffer_count + 6), + 0x40); if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { - u32 count = params.unknown_c + 1; + const u32 count = params.submix_count + 1; u64 node_count = Common::AlignUp(count, 0x40); - u64 node_state_buffer_sz = + const u64 node_state_buffer_sz = 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); u64 edge_matrix_buffer_sz = 0; node_count = Common::AlignUp(count * count, 0x40); @@ -289,19 +295,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { - buffer_sz += 0xE0 * params.unknown_2c; + buffer_sz += 0xE0 * params.num_splitter_send_channels; buffer_sz += 0x20 * params.splitter_count; - buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10); + buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10); } buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + ((params.voice_count * 256) | 0x40); - if (params.unknown_1c >= 1) { + if (params.performance_frame_count >= 1) { output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 16 * params.voice_count + 16) + 0x658) * - (params.unknown_1c + 1) + + (params.performance_frame_count + 1) + 0xc0, 0x40) + output_sz; @@ -325,6 +331,12 @@ void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { rb.PushIpcInterface<Audio::IAudioDevice>(); } +void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_Audio, "called"); + + OpenAudioRendererImpl(ctx); +} + void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); @@ -335,6 +347,15 @@ void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& c // based on the current revision } +void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IAudioRenderer>(params); +} + bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap switch (feature) { diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 3d63388fb..e55d25973 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h @@ -21,8 +21,11 @@ private: void OpenAudioRenderer(Kernel::HLERequestContext& ctx); void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); void GetAudioDeviceService(Kernel::HLERequestContext& ctx); + void OpenAudioRendererAuto(Kernel::HLERequestContext& ctx); void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); + void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); + enum class AudioFeatures : u32 { Splitter, }; diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index 6d897c842..7cc58db4c 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h @@ -15,7 +15,7 @@ namespace Kernel { class SharedMemory; } -namespace SM { +namespace Service::SM { class ServiceManager; } diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 21ccfe1f8..dbe7ee6e8 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, - const MathUtil::Rectangle<int>& crop_rect) { + const Common::Rectangle<int>& crop_rect) { VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index a45086e45..ace71169f 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -25,7 +25,7 @@ public: /// Performs a screen flip, drawing the buffer pointed to by the handle. void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, - const MathUtil::Rectangle<int>& crop_rect); + const Common::Rectangle<int>& crop_rect); private: std::shared_ptr<nvmap> nvmap_dev; diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index fc07d9bb8..4d150fc71 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { } void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, - const MathUtil::Rectangle<int>& crop_rect) { + const Common::Rectangle<int>& crop_rect) { auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { return buffer.slot == slot; }); ASSERT(itr != queue.end()); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index ab90d591e..e1ccb6171 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -67,14 +67,14 @@ public: Status status = Status::Free; IGBPBuffer igbp_buffer; BufferTransformFlags transform; - MathUtil::Rectangle<int> crop_rect; + Common::Rectangle<int> crop_rect; }; void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); std::optional<u32> DequeueBuffer(u32 width, u32 height); const IGBPBuffer& RequestBuffer(u32 slot) const; void QueueBuffer(u32 slot, BufferTransformFlags transform, - const MathUtil::Rectangle<int>& crop_rect); + const Common::Rectangle<int>& crop_rect); std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); void ReleaseBuffer(u32 slot); u32 Query(QueryType type); diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index b5d452db1..56f31e2ac 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -28,9 +28,13 @@ namespace Service::NVFlinger { constexpr std::size_t SCREEN_REFRESH_RATE = 60; constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); -NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) - : displays{{0, "Default"}, {1, "External"}, {2, "Edid"}, {3, "Internal"}, {4, "Null"}}, - core_timing{core_timing} { +NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} { + displays.emplace_back(0, "Default"); + displays.emplace_back(1, "External"); + displays.emplace_back(2, "Edid"); + displays.emplace_back(3, "Internal"); + displays.emplace_back(4, "Null"); + // Schedule the screen composition events composition_event = core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { @@ -55,13 +59,14 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { // TODO(Subv): Currently we only support the Default display. ASSERT(name == "Default"); - const auto itr = std::find_if(displays.begin(), displays.end(), - [&](const VI::Display& display) { return display.name == name; }); + const auto itr = + std::find_if(displays.begin(), displays.end(), + [&](const VI::Display& display) { return display.GetName() == name; }); if (itr == displays.end()) { return {}; } - return itr->id; + return itr->GetID(); } std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { @@ -71,13 +76,10 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { return {}; } - ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment"); - const u64 layer_id = next_layer_id++; const u32 buffer_queue_id = next_buffer_queue_id++; - auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); - display->layers.emplace_back(layer_id, buffer_queue); - buffer_queues.emplace_back(std::move(buffer_queue)); + buffer_queues.emplace_back(buffer_queue_id, layer_id); + display->CreateLayer(layer_id, buffer_queues.back()); return layer_id; } @@ -88,7 +90,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co return {}; } - return layer->buffer_queue->GetId(); + return layer->GetBufferQueue().GetId(); } Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { @@ -98,12 +100,20 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i return nullptr; } - return display->vsync_event.readable; + return display->GetVSyncEvent(); } -std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { +BufferQueue& NVFlinger::FindBufferQueue(u32 id) { const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), - [&](const auto& queue) { return queue->GetId() == id; }); + [id](const auto& queue) { return queue.GetId() == id; }); + + ASSERT(itr != buffer_queues.end()); + return *itr; +} + +const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const { + const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), + [id](const auto& queue) { return queue.GetId() == id; }); ASSERT(itr != buffer_queues.end()); return *itr; @@ -112,7 +122,7 @@ std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { VI::Display* NVFlinger::FindDisplay(u64 display_id) { const auto itr = std::find_if(displays.begin(), displays.end(), - [&](const VI::Display& display) { return display.id == display_id; }); + [&](const VI::Display& display) { return display.GetID() == display_id; }); if (itr == displays.end()) { return nullptr; @@ -124,7 +134,7 @@ VI::Display* NVFlinger::FindDisplay(u64 display_id) { const VI::Display* NVFlinger::FindDisplay(u64 display_id) const { const auto itr = std::find_if(displays.begin(), displays.end(), - [&](const VI::Display& display) { return display.id == display_id; }); + [&](const VI::Display& display) { return display.GetID() == display_id; }); if (itr == displays.end()) { return nullptr; @@ -140,14 +150,7 @@ VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) { return nullptr; } - const auto itr = std::find_if(display->layers.begin(), display->layers.end(), - [&](const VI::Layer& layer) { return layer.id == layer_id; }); - - if (itr == display->layers.end()) { - return nullptr; - } - - return &*itr; + return display->FindLayer(layer_id); } const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { @@ -157,33 +160,24 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { return nullptr; } - const auto itr = std::find_if(display->layers.begin(), display->layers.end(), - [&](const VI::Layer& layer) { return layer.id == layer_id; }); - - if (itr == display->layers.end()) { - return nullptr; - } - - return &*itr; + return display->FindLayer(layer_id); } void NVFlinger::Compose() { for (auto& display : displays) { // Trigger vsync for this display at the end of drawing - SCOPE_EXIT({ display.vsync_event.writable->Signal(); }); + SCOPE_EXIT({ display.SignalVSyncEvent(); }); // Don't do anything for displays without layers. - if (display.layers.empty()) + if (!display.HasLayers()) continue; // TODO(Subv): Support more than 1 layer. - ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported"); - - VI::Layer& layer = display.layers[0]; - auto& buffer_queue = layer.buffer_queue; + VI::Layer& layer = display.GetLayer(0); + auto& buffer_queue = layer.GetBufferQueue(); // Search for a queued buffer and acquire it - auto buffer = buffer_queue->AcquireBuffer(); + auto buffer = buffer_queue.AcquireBuffer(); MicroProfileFlip(); @@ -208,7 +202,7 @@ void NVFlinger::Compose() { igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer->get().transform, buffer->get().crop_rect); - buffer_queue->ReleaseBuffer(buffer->get().slot); + buffer_queue.ReleaseBuffer(buffer->get().slot); } } diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 2e000af91..c0a83fffb 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -28,8 +28,8 @@ class Module; } // namespace Service::Nvidia namespace Service::VI { -struct Display; -struct Layer; +class Display; +class Layer; } // namespace Service::VI namespace Service::NVFlinger { @@ -65,7 +65,10 @@ public: Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; /// Obtains a buffer queue identified by the ID. - std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const; + BufferQueue& FindBufferQueue(u32 id); + + /// Obtains a buffer queue identified by the ID. + const BufferQueue& FindBufferQueue(u32 id) const; /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when /// finished. @@ -87,7 +90,7 @@ private: std::shared_ptr<Nvidia::Module> nvdrv; std::vector<VI::Display> displays; - std::vector<std::shared_ptr<BufferQueue>> buffer_queues; + std::vector<BufferQueue> buffer_queues; /// Id to use for the next layer that is created, this counter is shared among all displays. u64 next_layer_id = 1; diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp index a108e468f..01d80311b 100644 --- a/src/core/hle/service/vi/display/vi_display.cpp +++ b/src/core/hle/service/vi/display/vi_display.cpp @@ -2,8 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <utility> + #include <fmt/format.h> +#include "common/assert.h" #include "core/core.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/service/vi/display/vi_display.h" @@ -19,4 +23,49 @@ Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} { Display::~Display() = default; +Layer& Display::GetLayer(std::size_t index) { + return layers.at(index); +} + +const Layer& Display::GetLayer(std::size_t index) const { + return layers.at(index); +} + +Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const { + return vsync_event.readable; +} + +void Display::SignalVSyncEvent() { + vsync_event.writable->Signal(); +} + +void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) { + // TODO(Subv): Support more than 1 layer. + ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment"); + + layers.emplace_back(id, buffer_queue); +} + +Layer* Display::FindLayer(u64 id) { + const auto itr = std::find_if(layers.begin(), layers.end(), + [id](const VI::Layer& layer) { return layer.GetID() == id; }); + + if (itr == layers.end()) { + return nullptr; + } + + return &*itr; +} + +const Layer* Display::FindLayer(u64 id) const { + const auto itr = std::find_if(layers.begin(), layers.end(), + [id](const VI::Layer& layer) { return layer.GetID() == id; }); + + if (itr == layers.end()) { + return nullptr; + } + + return &*itr; +} + } // namespace Service::VI diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h index df44db306..2acd46ff8 100644 --- a/src/core/hle/service/vi/display/vi_display.h +++ b/src/core/hle/service/vi/display/vi_display.h @@ -10,14 +10,84 @@ #include "common/common_types.h" #include "core/hle/kernel/writable_event.h" +namespace Service::NVFlinger { +class BufferQueue; +} + namespace Service::VI { -struct Layer; +class Layer; -struct Display { +/// Represents a single display type +class Display { +public: + /// Constructs a display with a given unique ID and name. + /// + /// @param id The unique ID for this display. + /// @param name The name for this display. + /// Display(u64 id, std::string name); ~Display(); + Display(const Display&) = delete; + Display& operator=(const Display&) = delete; + + Display(Display&&) = default; + Display& operator=(Display&&) = default; + + /// Gets the unique ID assigned to this display. + u64 GetID() const { + return id; + } + + /// Gets the name of this display + const std::string& GetName() const { + return name; + } + + /// Whether or not this display has any layers added to it. + bool HasLayers() const { + return !layers.empty(); + } + + /// Gets a layer for this display based off an index. + Layer& GetLayer(std::size_t index); + + /// Gets a layer for this display based off an index. + const Layer& GetLayer(std::size_t index) const; + + /// Gets the readable vsync event. + Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const; + + /// Signals the internal vsync event. + void SignalVSyncEvent(); + + /// Creates and adds a layer to this display with the given ID. + /// + /// @param id The ID to assign to the created layer. + /// @param buffer_queue The buffer queue for the layer instance to use. + /// + void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue); + + /// Attempts to find a layer with the given ID. + /// + /// @param id The layer ID. + /// + /// @returns If found, the Layer instance with the given ID. + /// If not found, then nullptr is returned. + /// + Layer* FindLayer(u64 id); + + /// Attempts to find a layer with the given ID. + /// + /// @param id The layer ID. + /// + /// @returns If found, the Layer instance with the given ID. + /// If not found, then nullptr is returned. + /// + const Layer* FindLayer(u64 id) const; + +private: u64 id; std::string name; diff --git a/src/core/hle/service/vi/layer/vi_layer.cpp b/src/core/hle/service/vi/layer/vi_layer.cpp index 3a83e5b95..954225c26 100644 --- a/src/core/hle/service/vi/layer/vi_layer.cpp +++ b/src/core/hle/service/vi/layer/vi_layer.cpp @@ -6,8 +6,7 @@ namespace Service::VI { -Layer::Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue) - : id{id}, buffer_queue{std::move(queue)} {} +Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {} Layer::~Layer() = default; diff --git a/src/core/hle/service/vi/layer/vi_layer.h b/src/core/hle/service/vi/layer/vi_layer.h index df328e09f..c6bfd01f6 100644 --- a/src/core/hle/service/vi/layer/vi_layer.h +++ b/src/core/hle/service/vi/layer/vi_layer.h @@ -4,8 +4,6 @@ #pragma once -#include <memory> - #include "common/common_types.h" namespace Service::NVFlinger { @@ -14,12 +12,41 @@ class BufferQueue; namespace Service::VI { -struct Layer { - Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue); +/// Represents a single display layer. +class Layer { +public: + /// Constructs a layer with a given ID and buffer queue. + /// + /// @param id The ID to assign to this layer. + /// @param queue The buffer queue for this layer to use. + /// + Layer(u64 id, NVFlinger::BufferQueue& queue); ~Layer(); + Layer(const Layer&) = delete; + Layer& operator=(const Layer&) = delete; + + Layer(Layer&&) = default; + Layer& operator=(Layer&&) = delete; + + /// Gets the ID for this layer. + u64 GetID() const { + return id; + } + + /// Gets a reference to the buffer queue this layer is using. + NVFlinger::BufferQueue& GetBufferQueue() { + return buffer_queue; + } + + /// Gets a const reference to the buffer queue this layer is using. + const NVFlinger::BufferQueue& GetBufferQueue() const { + return buffer_queue; + } + +private: u64 id; - std::shared_ptr<NVFlinger::BufferQueue> buffer_queue; + NVFlinger::BufferQueue& buffer_queue; }; } // namespace Service::VI diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index a317a2885..a975767bb 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -420,7 +420,7 @@ public: u32_le fence_is_valid; std::array<Fence, 2> fences; - MathUtil::Rectangle<int> GetCropRect() const { + Common::Rectangle<int> GetCropRect() const { return {crop_left, crop_top, crop_right, crop_bottom}; } }; @@ -525,7 +525,7 @@ private: LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, static_cast<u32>(transaction), flags); - auto buffer_queue = nv_flinger->FindBufferQueue(id); + auto& buffer_queue = nv_flinger->FindBufferQueue(id); if (transaction == TransactionId::Connect) { IGBPConnectRequestParcel request{ctx.ReadBuffer()}; @@ -538,7 +538,7 @@ private: } else if (transaction == TransactionId::SetPreallocatedBuffer) { IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; - buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer); + buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); IGBPSetPreallocatedBufferResponseParcel response{}; ctx.WriteBuffer(response.Serialize()); @@ -546,7 +546,7 @@ private: IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; const u32 width{request.data.width}; const u32 height{request.data.height}; - std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); + std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); if (slot) { // Buffer is available @@ -559,8 +559,8 @@ private: [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, Kernel::ThreadWakeupReason reason) { // Repeat TransactParcel DequeueBuffer when a buffer is available - auto buffer_queue = nv_flinger->FindBufferQueue(id); - std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); + auto& buffer_queue = nv_flinger->FindBufferQueue(id); + std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); IGBPDequeueBufferResponseParcel response{*slot}; @@ -568,28 +568,28 @@ private: IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); }, - buffer_queue->GetWritableBufferWaitEvent()); + buffer_queue.GetWritableBufferWaitEvent()); } } else if (transaction == TransactionId::RequestBuffer) { IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; - auto& buffer = buffer_queue->RequestBuffer(request.slot); + auto& buffer = buffer_queue.RequestBuffer(request.slot); IGBPRequestBufferResponseParcel response{buffer}; ctx.WriteBuffer(response.Serialize()); } else if (transaction == TransactionId::QueueBuffer) { IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; - buffer_queue->QueueBuffer(request.data.slot, request.data.transform, - request.data.GetCropRect()); + buffer_queue.QueueBuffer(request.data.slot, request.data.transform, + request.data.GetCropRect()); IGBPQueueBufferResponseParcel response{1280, 720}; ctx.WriteBuffer(response.Serialize()); } else if (transaction == TransactionId::Query) { IGBPQueryRequestParcel request{ctx.ReadBuffer()}; - u32 value = - buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); + const u32 value = + buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); IGBPQueryResponseParcel response{value}; ctx.WriteBuffer(response.Serialize()); @@ -629,12 +629,12 @@ private: LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); - const auto buffer_queue = nv_flinger->FindBufferQueue(id); + const auto& buffer_queue = nv_flinger->FindBufferQueue(id); // TODO(Subv): Find out what this actually is. IPC::ResponseBuilder rb{ctx, 2, 1}; rb.Push(RESULT_SUCCESS); - rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent()); + rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent()); } std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; @@ -752,6 +752,7 @@ public: {1102, nullptr, "GetDisplayResolution"}, {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"}, {2011, nullptr, "DestroyManagedLayer"}, + {2012, nullptr, "CreateStrayLayer"}, {2050, nullptr, "CreateIndirectLayer"}, {2051, nullptr, "DestroyIndirectLayer"}, {2052, nullptr, "CreateIndirectProducerEndPoint"}, diff --git a/src/core/memory.cpp b/src/core/memory.cpp index e9166dbd9..f809567b6 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa FlushMode::FlushAndInvalidate); VAddr end = base + size; - while (base != end) { - ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); + ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", + base + page_table.pointers.size()); - page_table.attributes[base] = type; - page_table.pointers[base] = memory; + std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); - base += 1; - if (memory != nullptr) + if (memory == nullptr) { + std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory); + } else { + while (base != end) { + page_table.pointers[base] = memory; + + base += 1; memory += PAGE_SIZE; + } } } diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp index 9570c060e..6d96d4019 100644 --- a/src/input_common/motion_emu.cpp +++ b/src/input_common/motion_emu.cpp @@ -32,12 +32,12 @@ public: } void BeginTilt(int x, int y) { - mouse_origin = Math::MakeVec(x, y); + mouse_origin = Common::MakeVec(x, y); is_tilting = true; } void Tilt(int x, int y) { - auto mouse_move = Math::MakeVec(x, y) - mouse_origin; + auto mouse_move = Common::MakeVec(x, y) - mouse_origin; if (is_tilting) { std::lock_guard<std::mutex> guard(tilt_mutex); if (mouse_move.x == 0 && mouse_move.y == 0) { @@ -45,7 +45,7 @@ public: } else { tilt_direction = mouse_move.Cast<float>(); tilt_angle = - std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f); + std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f); } } } @@ -56,7 +56,7 @@ public: is_tilting = false; } - std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() { + std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() { std::lock_guard<std::mutex> guard(status_mutex); return status; } @@ -66,17 +66,17 @@ private: const std::chrono::steady_clock::duration update_duration; const float sensitivity; - Math::Vec2<int> mouse_origin; + Common::Vec2<int> mouse_origin; std::mutex tilt_mutex; - Math::Vec2<float> tilt_direction; + Common::Vec2<float> tilt_direction; float tilt_angle = 0; bool is_tilting = false; Common::Event shutdown_event; - std::tuple<Math::Vec3<float>, Math::Vec3<float>> status; + std::tuple<Common::Vec3<float>, Common::Vec3<float>> status; std::mutex status_mutex; // Note: always keep the thread declaration at the end so that other objects are initialized @@ -85,8 +85,8 @@ private: void MotionEmuThread() { auto update_time = std::chrono::steady_clock::now(); - Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0); - Math::Quaternion<float> old_q; + Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0); + Common::Quaternion<float> old_q; while (!shutdown_event.WaitUntil(update_time)) { update_time += update_duration; @@ -96,18 +96,18 @@ private: std::lock_guard<std::mutex> guard(tilt_mutex); // Find the quaternion describing current 3DS tilting - q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), - tilt_angle); + q = Common::MakeQuaternion( + Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle); } auto inv_q = q.Inverse(); // Set the gravity vector in world space - auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f); + auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f); // Find the angular rate vector in world space auto angular_rate = ((q - old_q) * inv_q).xyz * 2; - angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180; + angular_rate *= 1000 / update_millisecond / Common::PI * 180; // Transform the two vectors from world space to 3DS space gravity = QuaternionRotate(inv_q, gravity); @@ -131,7 +131,7 @@ public: device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity); } - std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override { + std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override { return device->GetStatus(); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6036d6ed3..3e9d2b3be 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -104,6 +104,8 @@ add_library(video_core STATIC if (ENABLE_VULKAN) target_sources(video_core PRIVATE renderer_vulkan/declarations.h + renderer_vulkan/vk_buffer_cache.cpp + renderer_vulkan/vk_buffer_cache.h renderer_vulkan/vk_device.cpp renderer_vulkan/vk_device.h renderer_vulkan/vk_memory_manager.cpp @@ -111,7 +113,9 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_resource_manager.cpp renderer_vulkan/vk_resource_manager.h renderer_vulkan/vk_scheduler.cpp - renderer_vulkan/vk_scheduler.h) + renderer_vulkan/vk_scheduler.h + renderer_vulkan/vk_stream_buffer.cpp + renderer_vulkan/vk_stream_buffer.h) target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) target_compile_definitions(video_core PRIVATE HAS_VULKAN) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index ec1a57226..540dcc52c 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -44,10 +44,10 @@ void Fermi2D::HandleSurfaceCopy() { const u32 src_blit_y2{ static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; - const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; - const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, - regs.blit_dst_x + regs.blit_dst_width, - regs.blit_dst_y + regs.blit_dst_height}; + const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; + const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, + regs.blit_dst_x + regs.blit_dst_width, + regs.blit_dst_y + regs.blit_dst_height}; if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { UNIMPLEMENTED(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2d2136067..144e7fa82 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { auto debug_context = system.GetGPUDebugContext(); + const u32 method = method_call.method; + // It is an error to write to a register other than the current macro's ARG register before it // has finished execution. if (executing_macro != 0) { - ASSERT(method_call.method == executing_macro + 1); + ASSERT(method == executing_macro + 1); } // Methods after 0xE00 are special, they're actually triggers for some microcode that was // uploaded to the GPU during initialization. - if (method_call.method >= MacroRegistersStart) { + if (method >= MacroRegistersStart) { // We're trying to execute a macro if (executing_macro == 0) { // A macro call must begin by writing the macro method's register, not its argument. - ASSERT_MSG((method_call.method % 2) == 0, + ASSERT_MSG((method % 2) == 0, "Can't start macro execution by writing to the ARGS register"); - executing_macro = method_call.method; + executing_macro = method; } macro_params.push_back(method_call.argument); @@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { return; } - ASSERT_MSG(method_call.method < Regs::NUM_REGS, + ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } - if (regs.reg_array[method_call.method] != method_call.argument) { - regs.reg_array[method_call.method] = method_call.argument; + if (regs.reg_array[method] != method_call.argument) { + regs.reg_array[method] = method_call.argument; // Color buffers constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); - if (method_call.method >= first_rt_reg && - method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { - const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; - dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); + if (method >= first_rt_reg && + method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { + const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; + dirty_flags.color_buffer.set(rt_index); } // Zeta buffer constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); - if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || - method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || - method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || - (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && - method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { + if (method == MAXWELL3D_REG_INDEX(zeta_enable) || + method == MAXWELL3D_REG_INDEX(zeta_width) || + method == MAXWELL3D_REG_INDEX(zeta_height) || + (method >= MAXWELL3D_REG_INDEX(zeta) && + method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { dirty_flags.zeta_buffer = true; } // Shader constexpr u32 shader_registers_count = sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); - if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && - method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { + if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && + method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { dirty_flags.shaders = true; } // Vertex format - if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && - method_call.method < - MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { + if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && + method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { dirty_flags.vertex_attrib_format = true; } // Vertex buffer - if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && - method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { - dirty_flags.vertex_array |= - 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); - } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && - method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { - dirty_flags.vertex_array |= - 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); - } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && - method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { - dirty_flags.vertex_array |= - 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays)); + if (method >= MAXWELL3D_REG_INDEX(vertex_array) && + method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { + dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); + } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && + method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { + dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); + } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && + method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { + dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); } } - switch (method_call.method) { + switch (method) { case MAXWELL3D_REG_INDEX(macros.data): { ProcessMacroUpload(method_call.argument); break; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0e3873ffd..7fbf1026e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -5,8 +5,10 @@ #pragma once #include <array> +#include <bitset> #include <unordered_map> #include <vector> + #include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" @@ -503,7 +505,7 @@ public: f32 translate_z; INSERT_PADDING_WORDS(2); - MathUtil::Rectangle<s32> GetRect() const { + Common::Rectangle<s32> GetRect() const { return { GetX(), // left GetY() + GetHeight(), // top @@ -1094,19 +1096,18 @@ public: MemoryManager& memory_manager; struct DirtyFlags { - u8 color_buffer = 0xFF; - bool zeta_buffer = true; - - bool shaders = true; + std::bitset<8> color_buffer{0xFF}; + std::bitset<32> vertex_array{0xFFFFFFFF}; bool vertex_attrib_format = true; - u32 vertex_array = 0xFFFFFFFF; + bool zeta_buffer = true; + bool shaders = true; void OnMemoryWrite() { - color_buffer = 0xFF; zeta_buffer = true; shaders = true; - vertex_array = 0xFFFFFFFF; + color_buffer.set(); + vertex_array.set(); } }; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 1f425f90b..252592edd 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -376,9 +376,9 @@ enum class R2pMode : u64 { }; enum class IpaInterpMode : u64 { - Linear = 0, - Perspective = 1, - Flat = 2, + Pass = 0, + Multiply = 1, + Constant = 2, Sc = 3, }; diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index cf2b76ff6..e86a7f04a 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -16,6 +16,13 @@ enum class OutputTopology : u32 { TriangleStrip = 7, }; +enum class AttributeUse : u8 { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3, +}; + // Documentation in: // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture struct Header { @@ -84,9 +91,15 @@ struct Header { } vtg; struct { - INSERT_PADDING_BYTES(3); // ImapSystemValuesA - INSERT_PADDING_BYTES(1); // ImapSystemValuesB - INSERT_PADDING_BYTES(32); // ImapGenericVector[32] + INSERT_PADDING_BYTES(3); // ImapSystemValuesA + INSERT_PADDING_BYTES(1); // ImapSystemValuesB + union { + BitField<0, 2, AttributeUse> x; + BitField<2, 2, AttributeUse> y; + BitField<4, 2, AttributeUse> w; + BitField<6, 2, AttributeUse> z; + u8 raw; + } imap_generic_vector[32]; INSERT_PADDING_BYTES(2); // ImapColor INSERT_PADDING_BYTES(2); // ImapSystemValuesC INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] @@ -103,6 +116,28 @@ struct Header { const u32 bit = render_target * 4 + component; return omap.target & (1 << bit); } + AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { + return static_cast<AttributeUse>( + (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); + } + AttributeUse GetAttributeUse(u32 attribute) const { + AttributeUse result = AttributeUse::Unused; + for (u32 i = 0; i < 4; i++) { + const auto index = GetAttributeIndexUse(attribute, i); + if (index == AttributeUse::Unused) { + continue; + } + if (result == AttributeUse::Unused || result == index) { + result = index; + continue; + } + LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); + if (index == AttributeUse::Perspective) { + result = index; + } + } + return result; + } } ps; }; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 0f5bfdcbf..6313702f2 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -100,7 +100,7 @@ struct FramebufferConfig { using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; TransformFlags transform_flags; - MathUtil::Rectangle<int> crop_rect; + Common::Rectangle<int> crop_rect; }; namespace Engines { diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index bcf0c15a4..a7bcf26fb 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -129,6 +129,15 @@ protected: return ++modified_ticks; } + /// Flushes the specified object, updating appropriate cache state as needed + void FlushObject(const T& object) { + if (!object->IsDirty()) { + return; + } + object->Flush(); + object->MarkAsModified(false, *this); + } + private: /// Returns a list of cached objects from the specified memory region, ordered by access time std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { @@ -154,15 +163,6 @@ private: return objects; } - /// Flushes the specified object, updating appropriate cache state as needed - void FlushObject(const T& object) { - if (!object->IsDirty()) { - return; - } - object->Flush(); - object->MarkAsModified(false, *this); - } - using ObjectSet = std::set<T>; using ObjectCache = std::unordered_map<VAddr, T>; using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index b2a223705..6a1dc9cf6 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,8 +47,8 @@ public: /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) { return false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 12d876120..c8c1d6911 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -102,8 +102,8 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { + : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window}, + screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { } // Rebinding the VAO invalidates the vertex buffer bindings. - gpu.dirty_flags.vertex_array = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array.set(); state.draw.vertex_array = vao_entry.handle; return vao_entry.handle; @@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; - if (!gpu.dirty_flags.vertex_array) + if (gpu.dirty_flags.vertex_array.none()) return; MICROPROFILE_SCOPE(OpenGL_VB); // Upload all guest vertex arrays sequentially to our buffer for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (~gpu.dirty_flags.vertex_array & (1u << index)) + if (!gpu.dirty_flags.vertex_array[index]) continue; const auto& vertex_array = regs.vertex_array[index]; @@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { } } - gpu.dirty_flags.vertex_array = 0; + gpu.dirty_flags.vertex_array.reset(); } DrawParameters RasterizerOpenGL::SetupDraw() { @@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, std::optional<std::size_t> single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, single_color_target}; - if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && - !gpu.dirty_flags.zeta_buffer) { + if (fb_config_state == current_framebuffer_config_state && + gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the // host framebuffer may contain different attachments @@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() { // Add space for at least 18 constant buffers buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); - bool invalidate = buffer_cache.Map(buffer_size); + const bool invalidate = buffer_cache.Map(buffer_size); if (invalidate) { // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty_flags.vertex_array = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array.set(); } const GLuint vao = SetupVertexFormat(); @@ -738,9 +738,13 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); + res_cache.SignalPreDrawCall(); + // Execute draw call params.DispatchDraw(); + res_cache.SignalPostDrawCall(); + // Disable scissor test state.viewports[0].scissor.enabled = false; @@ -779,8 +783,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); return true; @@ -1034,7 +1038,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { for (std::size_t i = 0; i < viewport_count; i++) { auto& viewport = current_state.viewports[i]; const auto& src = regs.viewports[i]; - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; + const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; viewport.x = viewport_rect.left; viewport.y = viewport_rect.bottom; viewport.width = viewport_rect.GetWidth(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 258d62259..2f0524f85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -62,8 +62,8 @@ public: void FlushAndInvalidateRegion(VAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) override; + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 74200914e..b5a9722f9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <optional> #include <glad/glad.h> #include "common/alignment.h" @@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return format; } -MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { +Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; if (IsPixelFormatASTC(pixel_format)) { // ASTC formats must stop at the ATSC block size boundary @@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params) // alternatives. This signals a bug on those functions. const auto width = static_cast<GLsizei>(params.MipWidth(0)); const auto height = static_cast<GLsizei>(params.MipHeight(0)); + memory_size = params.MemorySize(); + reinterpreted = false; const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); gl_internal_format = format_tuple.internal_format; @@ -962,30 +965,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; const auto& regs{gpu.regs}; - if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { - return last_color_buffers[index]; + if (!gpu.dirty_flags.color_buffer[index]) { + return current_color_buffers[index]; } - gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); + gpu.dirty_flags.color_buffer.reset(index); ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); if (index >= regs.rt_control.count) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; - return last_color_buffers[index] = GetSurface(color_params, preserve_contents); + return current_color_buffers[index] = GetSurface(color_params, preserve_contents); } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { surface->LoadGLBuffer(); surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); surface->MarkAsModified(false, *this); + surface->MarkForReload(false); } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { @@ -997,18 +1001,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres Surface surface{TryGet(params.addr)}; if (surface) { if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { - // Use the cached surface as-is + // Use the cached surface as-is unless it's not synced with memory + if (surface->MustReload()) + LoadSurface(surface); return surface; } else if (preserve_contents) { // If surface parameters changed and we care about keeping the previous data, recreate // the surface from the old one Surface new_surface{RecreateSurface(surface, params)}; - Unregister(surface); + UnregisterSurface(surface); Register(new_surface); + if (new_surface->IsUploaded()) { + RegisterReinterpretSurface(new_surface); + } return new_surface; } else { // Delete the old surface before creating a new one to prevent collisions. - Unregister(surface); + UnregisterSurface(surface); } } @@ -1062,8 +1071,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, } static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { @@ -1193,7 +1202,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, void RasterizerCacheOpenGL::FermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) { const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); @@ -1257,7 +1266,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, case SurfaceTarget::TextureCubemap: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - FastLayeredCopySurface(old_surface, new_surface); + if (old_params.pixel_format == new_params.pixel_format) + FastLayeredCopySurface(old_surface, new_surface); + else { + AccurateCopySurface(old_surface, new_surface); + } break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", @@ -1286,4 +1299,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params return {}; } +static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params, + u32 height) { + for (u32 i = 0; i < params.max_mip_level; i++) { + if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { + return {i}; + } + } + return {}; +} + +static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { + const std::size_t size = params.LayerMemorySize(); + VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); + for (u32 i = 0; i < params.depth; i++) { + if (start == addr) { + return {i}; + } + start += size; + } + return {}; +} + +static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + const std::size_t src_memory_size = src_params.size_in_bytes; + const std::optional<u32> level = + TryFindBestMipMap(src_memory_size, dst_params, src_params.height); + if (level.has_value()) { + if (src_params.width == dst_params.MipWidthGobAligned(*level) && + src_params.height == dst_params.MipHeight(*level) && + src_params.block_height >= dst_params.MipBlockHeight(*level)) { + const std::optional<u32> slot = + TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); + if (slot.has_value()) { + glCopyImageSubData(render_surface->Texture().handle, + SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, + blitted_surface->Texture().handle, + SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, + dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); + blitted_surface->MarkAsModified(true, cache); + return true; + } + } + } + return false; +} + +static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { + const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); + const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); + if (bound2 > bound1) + return true; + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.component_type != src_params.component_type); +} + +static bool IsReinterpretInvalidSecond(const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.height > src_params.height && dst_params.width > src_params.width); +} + +bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, + Surface intersect) { + if (IsReinterpretInvalid(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { + if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + FlushObject(intersect); + FlushObject(triggering_surface); + intersect->MarkForReload(true); + } + return true; +} + +void RasterizerCacheOpenGL::SignalPreDrawCall() { + if (texception && GLAD_GL_ARB_texture_barrier) { + glTextureBarrier(); + } + texception = false; +} + +void RasterizerCacheOpenGL::SignalPostDrawCall() { + for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { + if (current_color_buffers[i] != nullptr) { + Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); + if (intersect != nullptr) { + PartialReinterpretSurface(current_color_buffers[i], intersect); + texception = true; + } + } + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 89d733c50..797bbdc9c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -28,12 +28,13 @@ namespace OpenGL { class CachedSurface; using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; +using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>; using SurfaceTarget = VideoCore::Surface::SurfaceTarget; using SurfaceType = VideoCore::Surface::SurfaceType; using PixelFormat = VideoCore::Surface::PixelFormat; using ComponentType = VideoCore::Surface::ComponentType; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct SurfaceParams { enum class SurfaceClass { @@ -71,7 +72,7 @@ struct SurfaceParams { } /// Returns the rectangle corresponding to this surface - MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; + Common::Rectangle<u32> GetRect(u32 mip_level = 0) const; /// Returns the total size of this surface in bytes, adjusted for compression std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { @@ -140,10 +141,18 @@ struct SurfaceParams { return offset; } + std::size_t GetMipmapSingleSize(u32 mip_level) const { + return InnerMipmapMemorySize(mip_level, false, is_layered); + } + u32 MipWidth(u32 mip_level) const { return std::max(1U, width >> mip_level); } + u32 MipWidthGobAligned(u32 mip_level) const { + return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); + } + u32 MipHeight(u32 mip_level) const { return std::max(1U, height >> mip_level); } @@ -346,6 +355,10 @@ public: return cached_size_in_bytes; } + std::size_t GetMemorySize() const { + return memory_size; + } + void Flush() override { FlushGLBuffer(); } @@ -395,6 +408,26 @@ public: Tegra::Texture::SwizzleSource swizzle_z, Tegra::Texture::SwizzleSource swizzle_w); + void MarkReinterpreted() { + reinterpreted = true; + } + + bool IsReinterpreted() const { + return reinterpreted; + } + + void MarkForReload(bool reload) { + must_reload = reload; + } + + bool MustReload() const { + return must_reload; + } + + bool IsUploaded() const { + return params.identity == SurfaceParams::SurfaceClass::Uploaded; + } + private: void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); @@ -408,6 +441,9 @@ private: GLenum gl_internal_format{}; std::size_t cached_size_in_bytes{}; std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; + std::size_t memory_size; + bool reinterpreted = false; + bool must_reload = false; }; class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { @@ -430,8 +466,11 @@ public: /// Copies the contents of one surface to another void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect); + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect); + + void SignalPreDrawCall(); + void SignalPostDrawCall(); private: void LoadSurface(const Surface& surface); @@ -449,6 +488,10 @@ private: /// Tries to get a reserved surface for the specified parameters Surface TryGetReservedSurface(const SurfaceParams& params); + // Partialy reinterpret a surface based on a triggering_surface that collides with it. + // returns true if the reinterpret was successful, false in case it was not. + bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); + /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); @@ -465,12 +508,50 @@ private: OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; + bool texception = false; + /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one /// using the new format. OGLBuffer copy_pbo; - std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; + std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; + std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; Surface last_depth_buffer; + + using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>; + using SurfaceInterval = typename SurfaceIntervalCache::interval_type; + + static auto GetReinterpretInterval(const Surface& object) { + return SurfaceInterval::right_open(object->GetAddr() + 1, + object->GetAddr() + object->GetMemorySize() - 1); + } + + // Reinterpreted surfaces are very fragil as the game may keep rendering into them. + SurfaceIntervalCache reinterpreted_surfaces; + + void RegisterReinterpretSurface(Surface reinterpret_surface) { + auto interval = GetReinterpretInterval(reinterpret_surface); + reinterpreted_surfaces.insert({interval, reinterpret_surface}); + reinterpret_surface->MarkReinterpreted(); + } + + Surface CollideOnReinterpretedSurface(VAddr addr) const { + const SurfaceInterval interval{addr}; + for (auto& pair : + boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { + return pair.second; + } + return nullptr; + } + + /// Unregisters an object from the cache + void UnregisterSurface(const Surface& object) { + if (object->IsReinterpreted()) { + auto interval = GetReinterpretInterval(object); + reinterpreted_surfaces.erase(interval); + } + Unregister(object); + } }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index db18f4dbe..72ff6ac6a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -20,6 +20,7 @@ namespace OpenGL::GLShader { using Tegra::Shader::Attribute; +using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; @@ -288,34 +289,22 @@ private: code.AddNewLine(); } - std::string GetInputFlags(const IpaMode& input_mode) { - const IpaSampleMode sample_mode = input_mode.sampling_mode; - const IpaInterpMode interp_mode = input_mode.interpolation_mode; + std::string GetInputFlags(AttributeUse attribute) { std::string out; - switch (interp_mode) { - case IpaInterpMode::Flat: + switch (attribute) { + case AttributeUse::Constant: out += "flat "; break; - case IpaInterpMode::Linear: + case AttributeUse::ScreenLinear: out += "noperspective "; break; - case IpaInterpMode::Perspective: + case AttributeUse::Perspective: // Default, Smooth break; default: - UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); - } - switch (sample_mode) { - case IpaSampleMode::Centroid: - // It can be implemented with the "centroid " keyword in GLSL - UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); - break; - case IpaSampleMode::Default: - // Default, n/a - break; - default: - UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); + LOG_CRITICAL(HW_GPU, "Unused attribute being fetched"); + UNREACHABLE(); } return out; } @@ -324,16 +313,11 @@ private: const auto& attributes = ir.GetInputAttributes(); for (const auto element : attributes) { const Attribute::Index index = element.first; - const IpaMode& input_mode = *element.second.begin(); if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { // Skip when it's not a generic attribute continue; } - ASSERT(element.second.size() > 0); - UNIMPLEMENTED_IF_MSG(element.second.size() > 1, - "Multiple input flag modes are not supported in GLSL"); - // TODO(bunnei): Use proper number of elements for these u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); if (stage != ShaderStage::Vertex) { @@ -345,8 +329,14 @@ private: if (stage == ShaderStage::Geometry) { attr = "gs_" + attr + "[]"; } - code.AddLine("layout (location = " + std::to_string(idx) + ") " + - GetInputFlags(input_mode) + "in vec4 " + attr + ';'); + std::string suffix; + if (stage == ShaderStage::Fragment) { + const auto input_mode = + header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION); + suffix = GetInputFlags(input_mode); + } + code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " + + attr + ';'); } if (!attributes.empty()) code.AddNewLine(); @@ -1584,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st return {decompiler.GetResult(), decompiler.GetShaderEntries()}; } -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 81882822b..82fc4d44b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <cstring> #include <fmt/format.h> #include <lz4.h> diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 04e1db911..7d96649af 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (location = 0) in vec4 position; +layout (location = 0) in noperspective vec4 position; layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { vec4 viewport_flip; @@ -172,4 +172,4 @@ void main() { return {out, program.second}; } -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 272fc2e8e..e60b2eb44 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -257,6 +257,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer) { texture.width = framebuffer.width; texture.height = framebuffer.height; + texture.pixel_format = framebuffer.pixel_format; GLint internal_format; switch (framebuffer.pixel_format) { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 7e13e566b..c168fa89e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -39,7 +39,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture; - const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; + const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; @@ -102,7 +102,7 @@ private: /// Used for transforming the framebuffer orientation Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; - MathUtil::Rectangle<int> framebuffer_crop_rect; + Common::Rectangle<int> framebuffer_crop_rect; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp new file mode 100644 index 000000000..18b7b94a1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -0,0 +1,116 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include <memory> +#include <optional> +#include <tuple> + +#include "common/alignment.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, + VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, + VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) + : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager} { + const auto usage = vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eUniformBuffer; + const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead | + vk::AccessFlagBits::eUniformRead; + stream_buffer = + std::make_unique<VKStreamBuffer>(device, memory_manager, scheduler, size, usage, access, + vk::PipelineStageFlagBits::eAllCommands); + buffer_handle = stream_buffer->GetBuffer(); +} + +VKBufferCache::~VKBufferCache() = default; + +u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment, + bool cache) { + const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; + ASSERT(cpu_addr); + + // Cache management is a big overhead, so only cache entries with a given size. + // TODO: Figure out which size is the best for given games. + cache &= size >= 2048; + + if (cache) { + if (auto entry = TryGet(*cpu_addr); entry) { + if (entry->size >= size && entry->alignment == alignment) { + return entry->offset; + } + Unregister(entry); + } + } + + AlignBuffer(alignment); + const u64 uploaded_offset = buffer_offset; + + Memory::ReadBlock(*cpu_addr, buffer_ptr, size); + + buffer_ptr += size; + buffer_offset += size; + + if (cache) { + auto entry = std::make_shared<CachedBufferEntry>(); + entry->offset = uploaded_offset; + entry->size = size; + entry->alignment = alignment; + entry->addr = *cpu_addr; + Register(entry); + } + + return uploaded_offset; +} + +u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { + AlignBuffer(alignment); + std::memcpy(buffer_ptr, raw_pointer, size); + const u64 uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return uploaded_offset; +} + +std::tuple<u8*, u64> VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { + AlignBuffer(alignment); + u8* const uploaded_ptr = buffer_ptr; + const u64 uploaded_offset = buffer_offset; + + buffer_ptr += size; + buffer_offset += size; + return {uploaded_ptr, uploaded_offset}; +} + +void VKBufferCache::Reserve(std::size_t max_size) { + bool invalidate; + std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); + buffer_offset = buffer_offset_base; + + if (invalidate) { + InvalidateAll(); + } +} + +VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { + return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); +} + +void VKBufferCache::AlignBuffer(std::size_t alignment) { + // Align the offset, not the mapped pointer + const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); + buffer_ptr += offset_aligned - buffer_offset; + buffer_offset = offset_aligned; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h new file mode 100644 index 000000000..d8e916f31 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -0,0 +1,87 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <tuple> + +#include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Vulkan { + +class VKDevice; +class VKFence; +class VKMemoryManager; +class VKStreamBuffer; + +struct CachedBufferEntry final : public RasterizerCacheObject { + VAddr GetAddr() const override { + return addr; + } + + std::size_t GetSizeInBytes() const override { + return size; + } + + // We do not have to flush this cache as things in it are never modified by us. + void Flush() override {} + + VAddr addr; + std::size_t size; + u64 offset; + std::size_t alignment; +}; + +class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { +public: + explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, + VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, + VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); + ~VKBufferCache(); + + /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been + /// allocated. + u64 UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, + bool cache = true); + + /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. + u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); + + /// Reserves memory to be used by host's CPU. Returns mapped address and offset. + std::tuple<u8*, u64> ReserveMemory(std::size_t size, u64 alignment = 4); + + /// Reserves a region of memory to be used in subsequent upload/reserve operations. + void Reserve(std::size_t max_size); + + /// Ensures that the set data is sent to the device. + [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); + + /// Returns the buffer cache handle. + vk::Buffer GetBuffer() const { + return buffer_handle; + } + +private: + void AlignBuffer(std::size_t alignment); + + Tegra::MemoryManager& tegra_memory_manager; + + std::unique_ptr<VKStreamBuffer> stream_buffer; + vk::Buffer buffer_handle; + + u8* buffer_ptr = nullptr; + u64 buffer_offset = 0; + u64 buffer_offset_base = 0; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 17ee93b91..0451babbf 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, u64 begin, u64 end) - : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {} + : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index 1678463c7..a1e117443 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) { protected_resources.push_back(resource); } -void VKFence::Unprotect(const VKResource* resource) { +void VKFence::Unprotect(VKResource* resource) { const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); - if (it != protected_resources.end()) { - protected_resources.erase(it); - } + ASSERT(it != protected_resources.end()); + + resource->OnFenceRemoval(this); + protected_resources.erase(it); } VKFenceWatch::VKFenceWatch() = default; @@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() { } void VKFenceWatch::Wait() { - if (!fence) { + if (fence == nullptr) { return; } fence->Wait(); fence->Unprotect(this); - fence = nullptr; } void VKFenceWatch::Watch(VKFence& new_fence) { diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h index 5018dfa44..5bfe4cead 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h @@ -63,7 +63,7 @@ public: void Protect(VKResource* resource); /// Removes protection for a resource. - void Unprotect(const VKResource* resource); + void Unprotect(VKResource* resource); /// Retreives the fence. operator vk::Fence() const { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp new file mode 100644 index 000000000..58ffa42f2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -0,0 +1,90 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <memory> +#include <optional> +#include <vector> + +#include "common/assert.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; +constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; + +VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, + vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) + : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ + pipeline_stage} { + CreateBuffers(memory_manager, usage); + ReserveWatches(WATCHES_INITIAL_RESERVE); +} + +VKStreamBuffer::~VKStreamBuffer() = default; + +std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { + ASSERT(size <= buffer_size); + mapped_size = size; + + if (offset + size > buffer_size) { + // The buffer would overflow, save the amount of used buffers, signal an invalidation and + // reset the state. + invalidation_mark = used_watches; + used_watches = 0; + offset = 0; + } + + return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; +} + +VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { + ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); + + if (invalidation_mark) { + // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. + exctx = scheduler.Flush(); + std::for_each(watches.begin(), watches.begin() + *invalidation_mark, + [&](auto& resource) { resource->Wait(); }); + invalidation_mark = std::nullopt; + } + + if (used_watches + 1 >= watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(WATCHES_RESERVE_CHUNK); + } + // Add a watch for this allocation. + watches[used_watches++]->Watch(exctx.GetFence()); + + offset += size; + + return exctx; +} + +void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { + const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, + nullptr); + + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); + commit = memory_manager.Commit(*buffer, true); + mapped_pointer = commit->GetData(); +} + +void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { + const std::size_t previous_size = watches.size(); + watches.resize(previous_size + grow_size); + std::generate(watches.begin() + previous_size, watches.end(), + []() { return std::make_unique<VKFenceWatch>(); }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h new file mode 100644 index 000000000..69d036ccd --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -0,0 +1,72 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <optional> +#include <tuple> +#include <vector> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" + +namespace Vulkan { + +class VKDevice; +class VKFence; +class VKFenceWatch; +class VKResourceManager; +class VKScheduler; + +class VKStreamBuffer { +public: + explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, + vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); + ~VKStreamBuffer(); + + /** + * Reserves a region of memory from the stream buffer. + * @param size Size to reserve. + * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer + * offset and a boolean that's true when buffer has been invalidated. + */ + std::tuple<u8*, u64, bool> Reserve(u64 size); + + /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. + [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); + + vk::Buffer GetBuffer() const { + return *buffer; + } + +private: + /// Creates Vulkan buffer handles committing the required the required memory. + void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); + + /// Increases the amount of watches available. + void ReserveWatches(std::size_t grow_size); + + const VKDevice& device; ///< Vulkan device manager. + VKScheduler& scheduler; ///< Command scheduler. + const u64 buffer_size; ///< Total size of the stream buffer. + const vk::AccessFlags access; ///< Access usage of this stream buffer. + const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. + + UniqueBuffer buffer; ///< Mapped buffer. + VKMemoryCommit commit; ///< Memory commit. + u8* mapped_pointer{}; ///< Pointer to the host visible commit + + u64 offset{}; ///< Buffer iterator. + u64 mapped_size{}; ///< Size reserved for the current copy. + + std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches + std::size_t used_watches{}; ///< Count of watches, reset on invalidation. + std::optional<std::size_t> + invalidation_mark{}; ///< Number of watches used in the current invalidation. +}; + +} // namespace Vulkan diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 55ec601ff..38f01ca50 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, "Unaligned attribute loads are not supported"); - Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, + Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass, Tegra::Shader::IpaSampleMode::Default}; u64 next_element = instr.attribute.fmt20.element; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index f9502e3d0..d750a2936 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { instr.ipa.sample_mode.Value()}; const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); - const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); + Node value = attr; + const Tegra::Shader::Attribute::Index index = attribute.index.Value(); + if (index >= Tegra::Shader::Attribute::Index::Attribute_0 && + index <= Tegra::Shader::Attribute::Index::Attribute_31) { + // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. + // In theory by setting them as perspective, OpenGL does the perspective correction. + // A way must figured to reverse the last step of it. + if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { + value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); + } + } + value = GetSaturatedFloat(value, instr.ipa.saturate); SetRegister(bb, instr.gpr0, value); break; @@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index be4635342..33b071747 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, return {node, cursor}; } if (const auto conditional = std::get_if<ConditionalNode>(node)) { - const auto& code = conditional->GetCode(); - const auto [found, internal_cursor] = - FindOperation(code, static_cast<s64>(code.size() - 1), operation_code); + const auto& conditional_code = conditional->GetCode(); + const auto [found, internal_cursor] = FindOperation( + conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); if (found) return {found, cursor}; } @@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { return nullptr; } if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { - const auto& code = conditional->GetCode(); - return TrackCbuf(tracked, code, static_cast<s64>(code.size())); + const auto& conditional_code = conditional->GetCode(); + return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); } return nullptr; } diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp index 5f0896f84..c09a06520 100644 --- a/src/yuzu/compatdb.cpp +++ b/src/yuzu/compatdb.cpp @@ -61,7 +61,7 @@ void CompatDB::Submit() { button(QWizard::CancelButton)->setVisible(false); testcase_watcher.setFuture(QtConcurrent::run( - [this]() { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); + [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); break; default: LOG_ERROR(Frontend, "Unexpected page: {}", currentId()); diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp index 209798521..71683da8e 100644 --- a/src/yuzu/debugger/graphics/graphics_surface.cpp +++ b/src/yuzu/debugger/graphics/graphics_surface.cpp @@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() { for (unsigned int y = 0; y < surface_height; ++y) { for (unsigned int x = 0; x < surface_width; ++x) { - Math::Vec4<u8> color; + Common::Vec4<u8> color; color[0] = texture_data[x + y * surface_width + 0]; color[1] = texture_data[x + y * surface_width + 1]; color[2] = texture_data[x + y * surface_width + 2]; |