diff options
Diffstat (limited to 'src')
27 files changed, 1010 insertions, 149 deletions
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index 798ff6e84..fc02a3ceb 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -33,17 +33,28 @@ pad_cleft = pad_cright = [Core] -gpu_refresh_rate = ## 30 (default) -frame_skip = ## 0: No frameskip (default), 1 : 2x frameskip, 2 : 4x frameskip, etc. +# The refresh rate for the GPU +# Defaults to 30 +gpu_refresh_rate = + +# The applied frameskip amount. Must be a power of two. +# 0 (default): No frameskip, 1: x2 frameskip, 2: x4 frameskip, 3: x8 frameskip, etc. +frame_skip = [Data Storage] +# Whether to create a virtual SD card. +# 1 (default): Yes, 0: No use_virtual_sd = [System Region] -region_value = ## 0 : Japan, 1 : Usa (default), 2 : Europe, 3 : Australia, 4 : China, 5 : Korea, 6 : Taiwan. +# The system region that Citra will use during emulation +# 0: Japan, 1: USA (default), 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan +region_value = [Miscellaneous] -log_filter = *:Info ## Examples: *:Debug Kernel.SVC:Trace Service.*:Critical +# A filter which removes logs below a certain logging level. +# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical +log_filter = *:Info )"; } diff --git a/src/citra/emu_window/emu_window_glfw.cpp b/src/citra/emu_window/emu_window_glfw.cpp index ec3e8cf34..81231e1e5 100644 --- a/src/citra/emu_window/emu_window_glfw.cpp +++ b/src/citra/emu_window/emu_window_glfw.cpp @@ -36,20 +36,13 @@ const bool EmuWindow_GLFW::IsOpen() { } void EmuWindow_GLFW::OnFramebufferResizeEvent(GLFWwindow* win, int width, int height) { - ASSERT(width > 0); - ASSERT(height > 0); - - GetEmuWindow(win)->NotifyFramebufferSizeChanged(std::pair<unsigned,unsigned>(width, height)); + GetEmuWindow(win)->NotifyFramebufferLayoutChanged(EmuWindow::FramebufferLayout::DefaultScreenLayout(width, height)); } void EmuWindow_GLFW::OnClientAreaResizeEvent(GLFWwindow* win, int width, int height) { - ASSERT(width > 0); - ASSERT(height > 0); - // NOTE: GLFW provides no proper way to set a minimal window size. // Hence, we just ignore the corresponding EmuWindow hint. - - GetEmuWindow(win)->NotifyClientAreaSizeChanged(std::pair<unsigned,unsigned>(width, height)); + OnFramebufferResizeEvent(win, width, height); } /// EmuWindow_GLFW constructor diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index 586bc84b0..ff780cad4 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -13,6 +13,7 @@ set(SRCS debugger/graphics_cmdlists.cpp debugger/graphics_framebuffer.cpp debugger/graphics_vertex_shader.cpp + debugger/profiler.cpp debugger/ramview.cpp debugger/registers.cpp util/spinbox.cpp @@ -35,6 +36,7 @@ set(HEADERS debugger/graphics_cmdlists.h debugger/graphics_framebuffer.h debugger/graphics_vertex_shader.h + debugger/profiler.h debugger/ramview.h debugger/registers.h util/spinbox.h @@ -48,6 +50,7 @@ set(UIS config/controller_config.ui debugger/callstack.ui debugger/disassembler.ui + debugger/profiler.ui debugger/registers.ui hotkeys.ui main.ui @@ -61,7 +64,11 @@ else() qt4_wrap_ui(UI_HDRS ${UIS}) endif() -add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS}) +if (APPLE) + add_executable(citra-qt MACOSX_BUNDLE ${SRCS} ${HEADERS} ${UI_HDRS}) +else() + add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS}) +endif() target_link_libraries(citra-qt core common video_core qhexedit) target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index 6514288a0..a040e75c1 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -155,6 +155,7 @@ GRenderWindow::GRenderWindow(QWidget* parent) : QWidget(parent), emu_thread(this child = new GGLWidgetInternal(fmt, this); QBoxLayout* layout = new QHBoxLayout(this); + resize(VideoCore::kScreenTopWidth, VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight); layout->addWidget(child); layout->setMargin(0); @@ -234,7 +235,7 @@ void GRenderWindow::OnFramebufferSizeChanged() unsigned height = child->QPaintDevice::height(); #endif - NotifyFramebufferSizeChanged(std::make_pair(width, height)); + NotifyFramebufferLayoutChanged(EmuWindow::FramebufferLayout::DefaultScreenLayout(width, height)); } void GRenderWindow::BackupGeometry() diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 06eaf0bf0..3b072d015 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp @@ -12,6 +12,7 @@ #include "graphics_vertex_shader.h" +using nihstro::OpCode; using nihstro::Instruction; using nihstro::SourceRegister; using nihstro::SwizzlePattern; @@ -78,7 +79,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con const SwizzlePattern& swizzle = info.swizzle_info[instr.common.operand_desc_id].pattern; // longest known instruction name: "setemit " - output << std::setw(8) << std::left << instr.opcode.GetInfo().name; + output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; // e.g. "-c92.xyzw" static auto print_input = [](std::stringstream& output, const SourceRegister& input, @@ -109,16 +110,16 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con print_input_indexed(output, input, negate, swizzle_mask, address_register_name); }; - switch (instr.opcode.GetInfo().type) { - case Instruction::OpCodeType::Trivial: + switch (instr.opcode.Value().GetInfo().type) { + case OpCode::Type::Trivial: // Nothing to do here break; - case Instruction::OpCodeType::Arithmetic: + case OpCode::Type::Arithmetic: { // Use custom code for special instructions - switch (instr.opcode.EffectiveOpCode()) { - case Instruction::OpCode::CMP: + switch (instr.opcode.Value().EffectiveOpCode()) { + case OpCode::Id::CMP: { // NOTE: CMP always writes both cc components, so we do not consider the dest mask here. output << std::setw(4) << std::right << "cc."; @@ -142,13 +143,13 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con default: { - bool src_is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + bool src_is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed); - if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::Dest) { + if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Dest) { // e.g. "r12.xy__" - output << std::setw(4) << std::right << instr.common.dest.GetName() + "."; + output << std::setw(4) << std::right << instr.common.dest.Value().GetName() + "."; output << swizzle.DestMaskToString(); - } else if (instr.opcode.GetInfo().subtype == Instruction::OpCodeInfo::MOVA) { + } else if (instr.opcode.Value().GetInfo().subtype == OpCode::Info::MOVA) { output << std::setw(4) << std::right << "a0."; output << swizzle.DestMaskToString(); } else { @@ -156,7 +157,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con } output << " "; - if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::Src1) { + if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src1) { SourceRegister src1 = instr.common.GetSrc1(src_is_inverted); print_input_indexed(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false), instr.common.AddressRegisterName()); } else { @@ -164,7 +165,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con } // TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1 - if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::Src2) { + if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) { SourceRegister src2 = instr.common.GetSrc2(src_is_inverted); print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false)); } @@ -175,17 +176,17 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con break; } - case Instruction::OpCodeType::Conditional: + case OpCode::Type::Conditional: { - switch (instr.opcode.EffectiveOpCode()) { - case Instruction::OpCode::LOOP: + switch (instr.opcode.Value().EffectiveOpCode()) { + case OpCode::Id::LOOP: output << "(unknown instruction format)"; break; default: output << "if "; - if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasCondition) { + if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasCondition) { const char* ops[] = { " || ", " && ", "", "" }; @@ -198,22 +199,22 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con output << ((!instr.flow_control.refy) ? "!" : " ") << "cc.y"; output << " "; - } else if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasUniformIndex) { + } else if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasUniformIndex) { output << "b" << instr.flow_control.bool_uniform_id << " "; } u32 target_addr = instr.flow_control.dest_offset; u32 target_addr_else = instr.flow_control.dest_offset; - if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasAlternative) { + if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasAlternative) { output << "else jump to 0x" << std::setw(4) << std::right << std::setfill('0') << 4 * instr.flow_control.dest_offset << " "; - } else if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasExplicitDest) { + } else if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasExplicitDest) { output << "jump to 0x" << std::setw(4) << std::right << std::setfill('0') << 4 * instr.flow_control.dest_offset << " "; } else { // TODO: Handle other cases } - if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasFinishPoint) { + if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasFinishPoint) { output << "(return on " << std::setw(4) << std::right << std::setfill('0') << 4 * instr.flow_control.dest_offset + 4 * instr.flow_control.num_instructions << ")"; } diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp new file mode 100644 index 000000000..ae0568b6a --- /dev/null +++ b/src/citra_qt/debugger/profiler.cpp @@ -0,0 +1,138 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "profiler.h" + +#include "common/profiler_reporting.h" + +using namespace Common::Profiling; + +static QVariant GetDataForColumn(int col, const AggregatedDuration& duration) +{ + static auto duration_to_float = [](Duration dur) -> float { + using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>; + return std::chrono::duration_cast<FloatMs>(dur).count(); + }; + + switch (col) { + case 1: return duration_to_float(duration.avg); + case 2: return duration_to_float(duration.min); + case 3: return duration_to_float(duration.max); + default: return QVariant(); + } +} + +static const TimingCategoryInfo* GetCategoryInfo(int id) +{ + const auto& categories = GetProfilingManager().GetTimingCategoriesInfo(); + if (id >= categories.size()) { + return nullptr; + } else { + return &categories[id]; + } +} + +ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent) +{ + updateProfilingInfo(); + const auto& categories = GetProfilingManager().GetTimingCategoriesInfo(); + results.time_per_category.resize(categories.size()); +} + +QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const +{ + if (orientation == Qt::Horizontal && role == Qt::DisplayRole) { + switch (section) { + case 0: return tr("Category"); + case 1: return tr("Avg"); + case 2: return tr("Min"); + case 3: return tr("Max"); + } + } + + return QVariant(); +} + +QModelIndex ProfilerModel::index(int row, int column, const QModelIndex& parent) const +{ + return createIndex(row, column); +} + +QModelIndex ProfilerModel::parent(const QModelIndex& child) const +{ + return QModelIndex(); +} + +int ProfilerModel::columnCount(const QModelIndex& parent) const +{ + return 4; +} + +int ProfilerModel::rowCount(const QModelIndex& parent) const +{ + if (parent.isValid()) { + return 0; + } else { + return results.time_per_category.size() + 2; + } +} + +QVariant ProfilerModel::data(const QModelIndex& index, int role) const +{ + if (role == Qt::DisplayRole) { + if (index.row() == 0) { + if (index.column() == 0) { + return tr("Frame"); + } else { + return GetDataForColumn(index.column(), results.frame_time); + } + } else if (index.row() == 1) { + if (index.column() == 0) { + return tr("Frame (with swapping)"); + } else { + return GetDataForColumn(index.column(), results.interframe_time); + } + } else { + if (index.column() == 0) { + const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2); + return info != nullptr ? QString(info->name) : QVariant(); + } else { + if (index.row() - 2 < results.time_per_category.size()) { + return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]); + } else { + return QVariant(); + } + } + } + } + + return QVariant(); +} + +void ProfilerModel::updateProfilingInfo() +{ + results = GetTimingResultsAggregator()->GetAggregatedResults(); + emit dataChanged(createIndex(0, 1), createIndex(rowCount() - 1, 3)); +} + +ProfilerWidget::ProfilerWidget(QWidget* parent) : QDockWidget(parent) +{ + ui.setupUi(this); + + model = new ProfilerModel(this); + ui.treeView->setModel(model); + + connect(this, SIGNAL(visibilityChanged(bool)), SLOT(setProfilingInfoUpdateEnabled(bool))); + connect(&update_timer, SIGNAL(timeout()), model, SLOT(updateProfilingInfo())); +} + +void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable) +{ + if (enable) { + update_timer.start(100); + model->updateProfilingInfo(); + } else { + update_timer.stop(); + } +} diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h new file mode 100644 index 000000000..a6d87aa0f --- /dev/null +++ b/src/citra_qt/debugger/profiler.h @@ -0,0 +1,50 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <QAbstractItemModel> +#include <QDockWidget> +#include <QTimer> +#include "ui_profiler.h" + +#include "common/profiler_reporting.h" + +class ProfilerModel : public QAbstractItemModel +{ + Q_OBJECT + +public: + ProfilerModel(QObject* parent); + + QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const; + QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; + QModelIndex parent(const QModelIndex& child) const override; + int columnCount(const QModelIndex& parent = QModelIndex()) const override; + int rowCount(const QModelIndex& parent = QModelIndex()) const override; + QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; + +public slots: + void updateProfilingInfo(); + +private: + Common::Profiling::AggregatedFrameResult results; +}; + +class ProfilerWidget : public QDockWidget +{ + Q_OBJECT + +public: + ProfilerWidget(QWidget* parent = 0); + +private slots: + void setProfilingInfoUpdateEnabled(bool enable); + +private: + Ui::Profiler ui; + ProfilerModel* model; + + QTimer update_timer; +}; diff --git a/src/citra_qt/debugger/profiler.ui b/src/citra_qt/debugger/profiler.ui new file mode 100644 index 000000000..d3c9a9a1f --- /dev/null +++ b/src/citra_qt/debugger/profiler.ui @@ -0,0 +1,33 @@ +<?xml version="1.0" encoding="UTF-8"?> +<ui version="4.0"> + <class>Profiler</class> + <widget class="QDockWidget" name="Profiler"> + <property name="geometry"> + <rect> + <x>0</x> + <y>0</y> + <width>400</width> + <height>300</height> + </rect> + </property> + <property name="windowTitle"> + <string>Profiler</string> + </property> + <widget class="QWidget" name="dockWidgetContents"> + <layout class="QVBoxLayout" name="verticalLayout"> + <item> + <widget class="QTreeView" name="treeView"> + <property name="alternatingRowColors"> + <bool>true</bool> + </property> + <property name="uniformRowHeights"> + <bool>true</bool> + </property> + </widget> + </item> + </layout> + </widget> + </widget> + <resources/> + <connections/> +</ui> diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index df7699921..e5ca04124 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -35,6 +35,7 @@ #include "debugger/graphics_cmdlists.h" #include "debugger/graphics_framebuffer.h" #include "debugger/graphics_vertex_shader.h" +#include "debugger/profiler.h" #include "core/settings.h" #include "core/system.h" @@ -57,6 +58,10 @@ GMainWindow::GMainWindow() render_window = new GRenderWindow; render_window->hide(); + profilerWidget = new ProfilerWidget(this); + addDockWidget(Qt::BottomDockWidgetArea, profilerWidget); + profilerWidget->hide(); + disasmWidget = new DisassemblerWidget(this, render_window->GetEmuThread()); addDockWidget(Qt::BottomDockWidgetArea, disasmWidget); disasmWidget->hide(); @@ -90,6 +95,7 @@ GMainWindow::GMainWindow() graphicsVertexShaderWidget->hide(); QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging")); + debug_menu->addAction(profilerWidget->toggleViewAction()); debug_menu->addAction(disasmWidget->toggleViewAction()); debug_menu->addAction(registersWidget->toggleViewAction()); debug_menu->addAction(callstackWidget->toggleViewAction()); diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h index dd53489dd..9b57c5772 100644 --- a/src/citra_qt/main.h +++ b/src/citra_qt/main.h @@ -11,6 +11,7 @@ class GImageInfo; class GRenderWindow; +class ProfilerWidget; class DisassemblerWidget; class RegistersWidget; class CallstackWidget; @@ -54,6 +55,7 @@ private: GRenderWindow* render_window; + ProfilerWidget* profilerWidget; DisassemblerWidget* disasmWidget; RegistersWidget* registersWidget; CallstackWidget* callstackWidget; diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index b05c35546..daa2d59de 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -14,6 +14,7 @@ set(SRCS mem_arena.cpp memory_util.cpp misc.cpp + profiler.cpp scm_rev.cpp string_util.cpp symbols.cpp @@ -48,11 +49,14 @@ set(HEADERS mem_arena.h memory_util.h platform.h + profiler.h + profiler_reporting.h scm_rev.h scope_exit.h string_util.h swap.h symbols.h + synchronized_wrapper.h thread.h thread_queue_list.h thunk.h diff --git a/src/common/assert.h b/src/common/assert.h index 3b2232a7e..9ca7adb15 100644 --- a/src/common/assert.h +++ b/src/common/assert.h @@ -4,24 +4,43 @@ #pragma once +#include <cstdlib> + #include "common/common_funcs.h" +// For asserts we'd like to keep all the junk executed when an assert happens away from the +// important code in the function. One way of doing this is to put all the relevant code inside a +// lambda and force the compiler to not inline it. Unfortunately, MSVC seems to have no syntax to +// specify __declspec on lambda functions, so what we do instead is define a noinline wrapper +// template that calls the lambda. This seems to generate an extra instruction at the call-site +// compared to the ideal implementation (which wouldn't support ASSERT_MSG parameters), but is good +// enough for our purposes. +template <typename Fn> +#if defined(_MSC_VER) + __declspec(noinline, noreturn) +#elif defined(__GNUC__) + __attribute__((noinline, noreturn, cold)) +#endif +static void assert_noinline_call(const Fn& fn) { + fn(); + Crash(); + exit(1); // Keeps GCC's mouth shut about this actually returning +} + // TODO (yuriks) allow synchronous logging so we don't need printf #define ASSERT(_a_) \ - do if (!(_a_)) {\ + do if (!(_a_)) { assert_noinline_call([] { \ fprintf(stderr, "Assertion Failed!\n\n Line: %d\n File: %s\n Time: %s\n", \ __LINE__, __FILE__, __TIME__); \ - Crash(); \ - } while (0) + }); } while (0) #define ASSERT_MSG(_a_, ...) \ - do if (!(_a_)) {\ + do if (!(_a_)) { assert_noinline_call([&] { \ fprintf(stderr, "Assertion Failed!\n\n Line: %d\n File: %s\n Time: %s\n", \ __LINE__, __FILE__, __TIME__); \ fprintf(stderr, __VA_ARGS__); \ fprintf(stderr, "\n"); \ - Crash(); \ - } while (0) + }); } while (0) #define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!") diff --git a/src/common/emu_window.cpp b/src/common/emu_window.cpp index 48bb35db5..6459d2f32 100644 --- a/src/common/emu_window.cpp +++ b/src/common/emu_window.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "emu_window.h" +#include "video_core/video_core.h" void EmuWindow::KeyPressed(KeyMap::HostDeviceKey key) { Service::HID::PadState mapped_key = KeyMap::GetPadKey(key); @@ -15,3 +16,52 @@ void EmuWindow::KeyReleased(KeyMap::HostDeviceKey key) { Service::HID::PadButtonRelease(mapped_key); } + +EmuWindow::FramebufferLayout EmuWindow::FramebufferLayout::DefaultScreenLayout(unsigned width, unsigned height) { + ASSERT(width > 0); + ASSERT(height > 0); + + EmuWindow::FramebufferLayout res = { width, height, {}, {} }; + + float window_aspect_ratio = static_cast<float>(height) / width; + float emulation_aspect_ratio = static_cast<float>(VideoCore::kScreenTopHeight * 2) / + VideoCore::kScreenTopWidth; + + if (window_aspect_ratio > emulation_aspect_ratio) { + // Window is narrower than the emulation content => apply borders to the top and bottom + int viewport_height = static_cast<int>(std::round(emulation_aspect_ratio * width)); + + res.top_screen.left = 0; + res.top_screen.right = res.top_screen.left + width; + res.top_screen.top = (height - viewport_height) / 2; + res.top_screen.bottom = res.top_screen.top + viewport_height / 2; + + int bottom_width = static_cast<int>((static_cast<float>(VideoCore::kScreenBottomWidth) / + VideoCore::kScreenTopWidth) * (res.top_screen.right - res.top_screen.left)); + int bottom_border = ((res.top_screen.right - res.top_screen.left) - bottom_width) / 2; + + res.bottom_screen.left = bottom_border; + res.bottom_screen.right = res.bottom_screen.left + bottom_width; + res.bottom_screen.top = res.top_screen.bottom; + res.bottom_screen.bottom = res.bottom_screen.top + viewport_height / 2; + } else { + // Otherwise, apply borders to the left and right sides of the window. + int viewport_width = static_cast<int>(std::round(height / emulation_aspect_ratio)); + + res.top_screen.left = (width - viewport_width) / 2; + res.top_screen.right = res.top_screen.left + viewport_width; + res.top_screen.top = 0; + res.top_screen.bottom = res.top_screen.top + height / 2; + + int bottom_width = static_cast<int>((static_cast<float>(VideoCore::kScreenBottomWidth) / + VideoCore::kScreenTopWidth) * (res.top_screen.right - res.top_screen.left)); + int bottom_border = ((res.top_screen.right - res.top_screen.left) - bottom_width) / 2; + + res.bottom_screen.left = res.top_screen.left + bottom_border; + res.bottom_screen.right = res.bottom_screen.left + bottom_width; + res.bottom_screen.top = res.top_screen.bottom; + res.bottom_screen.bottom = res.bottom_screen.top + height / 2; + } + + return res; +} diff --git a/src/common/emu_window.h b/src/common/emu_window.h index 1ad4b82a3..f6099fdb6 100644 --- a/src/common/emu_window.h +++ b/src/common/emu_window.h @@ -8,6 +8,7 @@ #include "common/scm_rev.h" #include "common/string_util.h" #include "common/key_map.h" +#include "common/math_util.h" /** * Abstraction class used to provide an interface between emulation code and the frontend @@ -38,6 +39,23 @@ public: std::pair<unsigned,unsigned> min_client_area_size; }; + /// Describes the layout of the window framebuffer (size and top/bottom screen positions) + struct FramebufferLayout { + + /** + * Factory method for constructing a default FramebufferLayout + * @param width Window framebuffer width in pixels + * @param height Window framebuffer height in pixels + * @return Newly created FramebufferLayout object with default screen regions initialized + */ + static FramebufferLayout DefaultScreenLayout(unsigned width, unsigned height); + + unsigned width; + unsigned height; + MathUtil::Rectangle<unsigned> top_screen; + MathUtil::Rectangle<unsigned> bottom_screen; + }; + /// Swap buffers to display the next frame virtual void SwapBuffers() = 0; @@ -75,11 +93,11 @@ public: } /** - * Gets the framebuffer size in pixels. + * Gets the framebuffer layout (width, height, and screen regions) * @note This method is thread-safe */ - const std::pair<unsigned,unsigned> GetFramebufferSize() const { - return framebuffer_size; + const FramebufferLayout& GetFramebufferLayout() const { + return framebuffer_layout; } /** @@ -118,11 +136,11 @@ protected: } /** - * Update internal framebuffer size with the given parameter. + * Update framebuffer layout with the given parameter. * @note EmuWindow implementations will usually use this in window resize event handlers. */ - void NotifyFramebufferSizeChanged(const std::pair<unsigned,unsigned>& size) { - framebuffer_size = size; + void NotifyFramebufferLayoutChanged(const FramebufferLayout& layout) { + framebuffer_layout = layout; } /** @@ -143,7 +161,7 @@ private: // By default, ignore this request and do nothing. } - std::pair<unsigned,unsigned> framebuffer_size; + FramebufferLayout framebuffer_layout; ///< Current framebuffer layout unsigned client_area_width; ///< Current client width, should be set by window impl. unsigned client_area_height; ///< Current client height, should be set by window impl. diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp new file mode 100644 index 000000000..65c3df167 --- /dev/null +++ b/src/common/profiler.cpp @@ -0,0 +1,182 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/profiler.h" +#include "common/profiler_reporting.h" +#include "common/assert.h" + +#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013. +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#include <Windows.h> // For QueryPerformanceCounter/Frequency +#endif + +namespace Common { +namespace Profiling { + +#if ENABLE_PROFILING +thread_local Timer* Timer::current_timer = nullptr; +#endif + +#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013 +QPCClock::time_point QPCClock::now() { + static LARGE_INTEGER freq; + // Use this dummy local static to ensure this gets initialized once. + static BOOL dummy = QueryPerformanceFrequency(&freq); + + LARGE_INTEGER ticks; + QueryPerformanceCounter(&ticks); + + // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The + // correct way to approach this would be to just return ticks as a time_point and then subtract + // and do this conversion when creating a duration from two time_points, however, as far as I + // could tell the C++ requirements for these types are incompatible with this approach. + return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart)); +} +#endif + +TimingCategory::TimingCategory(const char* name, TimingCategory* parent) + : accumulated_duration(0) { + + ProfilingManager& manager = GetProfilingManager(); + category_id = manager.RegisterTimingCategory(this, name); + if (parent != nullptr) + manager.SetTimingCategoryParent(category_id, parent->category_id); +} + +ProfilingManager::ProfilingManager() + : last_frame_end(Clock::now()), this_frame_start(Clock::now()) { +} + +unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) { + TimingCategoryInfo info; + info.category = category; + info.name = name; + info.parent = TimingCategoryInfo::NO_PARENT; + + unsigned int id = (unsigned int)timing_categories.size(); + timing_categories.push_back(std::move(info)); + + return id; +} + +void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) { + ASSERT(category < timing_categories.size()); + ASSERT(parent < timing_categories.size()); + + timing_categories[category].parent = parent; +} + +void ProfilingManager::BeginFrame() { + this_frame_start = Clock::now(); +} + +void ProfilingManager::FinishFrame() { + Clock::time_point now = Clock::now(); + + results.interframe_time = now - last_frame_end; + results.frame_time = now - this_frame_start; + + results.time_per_category.resize(timing_categories.size()); + for (size_t i = 0; i < timing_categories.size(); ++i) { + results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime(); + } + + last_frame_end = now; +} + +TimingResultsAggregator::TimingResultsAggregator(size_t window_size) + : max_window_size(window_size), window_size(0) { + interframe_times.resize(window_size, Duration::zero()); + frame_times.resize(window_size, Duration::zero()); +} + +void TimingResultsAggregator::Clear() { + window_size = cursor = 0; +} + +void TimingResultsAggregator::SetNumberOfCategories(size_t n) { + size_t old_size = times_per_category.size(); + if (n == old_size) + return; + + times_per_category.resize(n); + + for (size_t i = old_size; i < n; ++i) { + times_per_category[i].resize(max_window_size, Duration::zero()); + } +} + +void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) { + SetNumberOfCategories(frame_result.time_per_category.size()); + + interframe_times[cursor] = frame_result.interframe_time; + frame_times[cursor] = frame_result.frame_time; + for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) { + times_per_category[i][cursor] = frame_result.time_per_category[i]; + } + + ++cursor; + if (cursor == max_window_size) + cursor = 0; + if (window_size < max_window_size) + ++window_size; +} + +static AggregatedDuration AggregateField(const std::vector<Duration>& v, size_t len) { + AggregatedDuration result; + result.avg = Duration::zero(); + + result.min = result.max = (len == 0 ? Duration::zero() : v[0]); + + for (size_t i = 1; i < len; ++i) { + Duration value = v[i]; + result.avg += value; + result.min = std::min(result.min, value); + result.max = std::max(result.max, value); + } + if (len != 0) + result.avg /= len; + + return result; +} + +static float tof(Common::Profiling::Duration dur) { + using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>; + return std::chrono::duration_cast<FloatMs>(dur).count(); +} + +AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const { + AggregatedFrameResult result; + + result.interframe_time = AggregateField(interframe_times, window_size); + result.frame_time = AggregateField(frame_times, window_size); + + if (result.interframe_time.avg != Duration::zero()) { + result.fps = 1000.0f / tof(result.interframe_time.avg); + } else { + result.fps = 0.0f; + } + + result.time_per_category.resize(times_per_category.size()); + for (size_t i = 0; i < times_per_category.size(); ++i) { + result.time_per_category[i] = AggregateField(times_per_category[i], window_size); + } + + return result; +} + +ProfilingManager& GetProfilingManager() { + // Takes advantage of "magic" static initialization for race-free initialization. + static ProfilingManager manager; + return manager; +} + +SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator() { + static SynchronizedWrapper<TimingResultsAggregator> aggregator(30); + return SynchronizedRef<TimingResultsAggregator>(aggregator); +} + +} // namespace Profiling +} // namespace Common diff --git a/src/common/profiler.h b/src/common/profiler.h new file mode 100644 index 000000000..3e967b4bc --- /dev/null +++ b/src/common/profiler.h @@ -0,0 +1,152 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include <chrono> + +#include "common/assert.h" +#include "common/thread.h" + +namespace Common { +namespace Profiling { + +// If this is defined to 0, it turns all Timers into no-ops. +#ifndef ENABLE_PROFILING +#define ENABLE_PROFILING 1 +#endif + +#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013 +// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad +// precision. We manually implement a clock based on QPC to get good results. + +struct QPCClock { + using duration = std::chrono::microseconds; + using time_point = std::chrono::time_point<QPCClock>; + using rep = duration::rep; + using period = duration::period; + static const bool is_steady = false; + + static time_point now(); +}; + +using Clock = QPCClock; +#else +using Clock = std::chrono::high_resolution_clock; +#endif + +using Duration = Clock::duration; + +/** + * Represents a timing category that measured time can be accounted towards. Should be declared as a + * global variable and passed to Timers. + */ +class TimingCategory final { +public: + TimingCategory(const char* name, TimingCategory* parent = nullptr); + + unsigned int GetCategoryId() const { + return category_id; + } + + /// Adds some time to this category. Can safely be called from multiple threads at the same time. + void AddTime(Duration amount) { + std::atomic_fetch_add_explicit( + &accumulated_duration, amount.count(), + std::memory_order_relaxed); + } + + /** + * Atomically retrieves the accumulated measured time for this category and resets the counter + * to zero. Can be safely called concurrently with AddTime. + */ + Duration GetAccumulatedTime() { + return Duration(std::atomic_exchange_explicit( + &accumulated_duration, (Duration::rep)0, + std::memory_order_relaxed)); + } + +private: + unsigned int category_id; + std::atomic<Duration::rep> accumulated_duration; +}; + +/** + * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given + * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be + * appropriately paired. + * + * When a Timer is started, it automatically pauses a previously running timer on the same thread, + * which is resumed when it is stopped. As such, no special action needs to be taken to avoid + * double-accounting of time on two categories. + */ +class Timer { +public: + Timer(TimingCategory& category) : category(category) { + } + + void Start() { +#if ENABLE_PROFILING + ASSERT(!running); + previous_timer = current_timer; + current_timer = this; + if (previous_timer != nullptr) + previous_timer->StopTiming(); + + StartTiming(); +#endif + } + + void Stop() { +#if ENABLE_PROFILING + ASSERT(running); + StopTiming(); + + if (previous_timer != nullptr) + previous_timer->StartTiming(); + current_timer = previous_timer; +#endif + } + +private: +#if ENABLE_PROFILING + void StartTiming() { + start = Clock::now(); + running = true; + } + + void StopTiming() { + auto duration = Clock::now() - start; + running = false; + category.AddTime(std::chrono::duration_cast<Duration>(duration)); + } + + Clock::time_point start; + bool running = false; + + Timer* previous_timer; + static thread_local Timer* current_timer; +#endif + + TimingCategory& category; +}; + +/** + * A Timer that automatically starts timing when created and stops at the end of the scope. Should + * be used in the majority of cases. + */ +class ScopeTimer : public Timer { +public: + ScopeTimer(TimingCategory& category) : Timer(category) { + Start(); + } + + ~ScopeTimer() { + Stop(); + } +}; + +} // namespace Profiling +} // namespace Common diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h new file mode 100644 index 000000000..3abb73315 --- /dev/null +++ b/src/common/profiler_reporting.h @@ -0,0 +1,108 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <chrono> +#include <mutex> +#include <utility> +#include <vector> + +#include "common/profiler.h" +#include "common/synchronized_wrapper.h" + +namespace Common { +namespace Profiling { + +struct TimingCategoryInfo { + static const unsigned int NO_PARENT = -1; + + TimingCategory* category; + const char* name; + unsigned int parent; +}; + +struct ProfilingFrameResult { + /// Time since the last delivered frame + Duration interframe_time; + + /// Time spent processing a frame, excluding VSync + Duration frame_time; + + /// Total amount of time spent inside each category in this frame. Indexed by the category id + std::vector<Duration> time_per_category; +}; + +class ProfilingManager final { +public: + ProfilingManager(); + + unsigned int RegisterTimingCategory(TimingCategory* category, const char* name); + void SetTimingCategoryParent(unsigned int category, unsigned int parent); + + const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const { + return timing_categories; + } + + /// This should be called after swapping screen buffers. + void BeginFrame(); + /// This should be called before swapping screen buffers. + void FinishFrame(); + + /// Get the timing results from the previous frame. This is updated when you call FinishFrame(). + const ProfilingFrameResult& GetPreviousFrameResults() const { + return results; + } + +private: + std::vector<TimingCategoryInfo> timing_categories; + Clock::time_point last_frame_end; + Clock::time_point this_frame_start; + + ProfilingFrameResult results; +}; + +struct AggregatedDuration { + Duration avg, min, max; +}; + +struct AggregatedFrameResult { + /// Time since the last delivered frame + AggregatedDuration interframe_time; + + /// Time spent processing a frame, excluding VSync + AggregatedDuration frame_time; + + float fps; + + /// Total amount of time spent inside each category in this frame. Indexed by the category id + std::vector<AggregatedDuration> time_per_category; +}; + +class TimingResultsAggregator final { +public: + TimingResultsAggregator(size_t window_size); + + void Clear(); + void SetNumberOfCategories(size_t n); + + void AddFrame(const ProfilingFrameResult& frame_result); + + AggregatedFrameResult GetAggregatedResults() const; + + size_t max_window_size; + size_t window_size; + size_t cursor; + + std::vector<Duration> interframe_times; + std::vector<Duration> frame_times; + std::vector<std::vector<Duration>> times_per_category; +}; + +ProfilingManager& GetProfilingManager(); +SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator(); + +} // namespace Profiling +} // namespace Common diff --git a/src/common/synchronized_wrapper.h b/src/common/synchronized_wrapper.h new file mode 100644 index 000000000..946252b8c --- /dev/null +++ b/src/common/synchronized_wrapper.h @@ -0,0 +1,69 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <mutex> + +namespace Common { + +/** + * Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no + * one forgets to lock a mutex before acessing an object. To access the wrapped object construct a + * SyncronizedRef on this wrapper. Inspired by Rust's Mutex type (http://doc.rust-lang.org/std/sync/struct.Mutex.html). + */ +template <typename T> +class SynchronizedWrapper { +public: + template <typename... Args> + SynchronizedWrapper(Args&&... args) : + data(std::forward<Args>(args)...) { + } + +private: + template <typename U> + friend class SynchronizedRef; + + std::mutex mutex; + T data; +}; + +/** + * Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This + * greatly reduces the chance that someone will access the wrapped resource without locking the + * mutex. + */ +template <typename T> +class SynchronizedRef { +public: + SynchronizedRef(SynchronizedWrapper<T>& wrapper) : wrapper(&wrapper) { + wrapper.mutex.lock(); + } + + SynchronizedRef(SynchronizedRef&) = delete; + SynchronizedRef(SynchronizedRef&& o) : wrapper(o.wrapper) { + o.wrapper = nullptr; + } + + ~SynchronizedRef() { + if (wrapper) + wrapper->mutex.unlock(); + } + + SynchronizedRef& operator=(SynchronizedRef&) = delete; + SynchronizedRef& operator=(SynchronizedRef&& o) { + std::swap(wrapper, o.wrapper); + } + + T& operator*() { return wrapper->data; } + const T& operator*() const { return wrapper->data; } + + T* operator->() { return &wrapper->data; } + const T* operator->() const { return &wrapper->data; } + +private: + SynchronizedWrapper<T>* wrapper; +}; + +} // namespace Common diff --git a/src/common/thread.h b/src/common/thread.h index eaf1ba00c..a45728e1e 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -24,6 +24,25 @@ #include <unistd.h> #endif +// Support for C++11's thread_local keyword was surprisingly spotty in compilers until very +// recently. Fortunately, thread local variables have been well supported for compilers for a while, +// but with semantics supporting only POD types, so we can use a few defines to get some amount of +// backwards compat support. +// WARNING: This only works correctly with POD types. +#if defined(__clang__) +# if !__has_feature(cxx_thread_local) +# define thread_local __thread +# endif +#elif defined(__GNUC__) +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +# define thread_local __thread +# endif +#elif defined(_MSC_VER) +# if _MSC_VER < 1900 +# define thread_local __declspec(thread) +# endif +#endif + namespace Common { diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index d8a708b9e..2f72f5077 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -9,6 +9,7 @@ #include <unordered_map> #include "common/logging/log.h" +#include "common/profiler.h" #include "core/mem_map.h" #include "core/hle/hle.h" @@ -20,6 +21,9 @@ #include "core/arm/skyeye_common/armmmu.h" #include "core/arm/skyeye_common/vfp/vfp.h" +Common::Profiling::TimingCategory profile_execute("DynCom::Execute"); +Common::Profiling::TimingCategory profile_decode("DynCom::Decode"); + enum { COND = (1 << 0), NON_BRANCH = (1 << 1), @@ -3569,6 +3573,8 @@ typedef struct instruction_set_encoding_item ISEITEM; extern const ISEITEM arm_instruction[]; static int InterpreterTranslate(ARMul_State* cpu, int& bb_start, addr_t addr) { + Common::Profiling::ScopeTimer timer_decode(profile_decode); + // Decode instruction, get index // Allocate memory and init InsCream // Go on next, until terminal instruction @@ -3641,6 +3647,8 @@ static bool InAPrivilegedMode(ARMul_State* core) { } unsigned InterpreterMainLoop(ARMul_State* state) { + Common::Profiling::ScopeTimer timer_execute(profile_execute); + #undef RM #undef RS @@ -4354,6 +4362,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) { cpu->Reg[14] = Memory::Read32(addr); else cpu->Reg_usr[1] = Memory::Read32(addr); + + addr += 4; } } else if (!BIT(inst, 22)) { for(int i = 0; i < 16; i++ ){ @@ -5966,54 +5976,51 @@ unsigned InterpreterMainLoop(ARMul_State* state) { ldst_inst* inst_cream = (ldst_inst*)inst_base->component; unsigned int inst = inst_cream->inst; - int i; unsigned int Rn = BITS(inst, 16, 19); unsigned int old_RN = cpu->Reg[Rn]; inst_cream->get_addr(cpu, inst_cream->inst, addr, 0); if (BIT(inst_cream->inst, 22) == 1) { - for (i = 0; i < 13; i++) { - if(BIT(inst_cream->inst, i)) { + for (int i = 0; i < 13; i++) { + if (BIT(inst_cream->inst, i)) { Memory::Write32(addr, cpu->Reg[i]); addr += 4; } } if (BIT(inst_cream->inst, 13)) { - if (cpu->Mode == USER32MODE) { - Memory::Write32(addr, cpu->Reg[i]); - addr += 4; - } else { + if (cpu->Mode == USER32MODE) + Memory::Write32(addr, cpu->Reg[13]); + else Memory::Write32(addr, cpu->Reg_usr[0]); - addr += 4; - } + + addr += 4; } if (BIT(inst_cream->inst, 14)) { - if (cpu->Mode == USER32MODE) { - Memory::Write32(addr, cpu->Reg[i]); - addr += 4; - } else { + if (cpu->Mode == USER32MODE) + Memory::Write32(addr, cpu->Reg[14]); + else Memory::Write32(addr, cpu->Reg_usr[1]); - addr += 4; - } + + addr += 4; } if (BIT(inst_cream->inst, 15)) { Memory::Write32(addr, cpu->Reg_usr[1] + 8); } } else { - for( i = 0; i < 15; i++ ) { - if(BIT(inst_cream->inst, i)) { - if(i == Rn) + for (int i = 0; i < 15; i++) { + if (BIT(inst_cream->inst, i)) { + if (i == Rn) Memory::Write32(addr, old_RN); else Memory::Write32(addr, cpu->Reg[i]); + addr += 4; } } // Check PC reg - if(BIT(inst_cream->inst, i)) { + if (BIT(inst_cream->inst, 15)) Memory::Write32(addr, cpu->Reg_usr[1] + 8); - } } } cpu->Reg[15] += GET_INST_SIZE(cpu); diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp index c6764a529..1aaeaa9c9 100644 --- a/src/core/hle/hle.cpp +++ b/src/core/hle/hle.cpp @@ -4,6 +4,8 @@ #include <vector> +#include "common/profiler.h" + #include "core/arm/arm_interface.h" #include "core/mem_map.h" #include "core/hle/hle.h" @@ -16,6 +18,8 @@ namespace HLE { +Common::Profiling::TimingCategory profiler_svc("SVC Calls"); + static std::vector<ModuleDef> g_module_db; bool g_reschedule = false; ///< If true, immediately reschedules the CPU to a new thread @@ -30,6 +34,8 @@ static const FunctionDef* GetSVCInfo(u32 opcode) { } void CallSVC(u32 opcode) { + Common::Profiling::ScopeTimer timer_svc(profiler_svc); + const FunctionDef *info = GetSVCInfo(opcode); if (!info) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 586ad62b6..e031871e8 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -4,6 +4,8 @@ #include <boost/range/algorithm/fill.hpp> +#include "common/profiler.h" + #include "clipper.h" #include "command_processor.h" #include "math.h" @@ -25,6 +27,8 @@ static int float_regs_counter = 0; static u32 uniform_write_buffer[4]; +Common::Profiling::TimingCategory category_drawing("Drawing"); + static inline void WritePicaReg(u32 id, u32 value, u32 mask) { if (id >= registers.NumIds()) @@ -53,6 +57,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(trigger_draw): case PICA_REG_INDEX(trigger_draw_indexed): { + Common::Profiling::ScopeTimer scope_timer(category_drawing); + DebugUtils::DumpTevStageConfig(registers.GetTevStages()); if (g_debug_context) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index a27d3828c..745c4f4ed 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -505,7 +505,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture } // Add modifier - unsigned table_index = (x < 2) ? table_index_2.Value() : table_index_1.Value(); + unsigned table_index = (x < 2) ? table_index_1.Value() : table_index_2.Value(); static const auto etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{ { 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 }, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 272695174..95ab96340 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -4,7 +4,10 @@ #include "core/hw/gpu.h" #include "core/mem_map.h" + #include "common/emu_window.h" +#include "common/profiler_reporting.h" + #include "video_core/video_core.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -75,9 +78,18 @@ void RendererOpenGL::SwapBuffers() { DrawScreens(); + auto& profiler = Common::Profiling::GetProfilingManager(); + profiler.FinishFrame(); + { + auto aggregator = Common::Profiling::GetTimingResultsAggregator(); + aggregator->AddFrame(profiler.GetPreviousFrameResults()); + } + // Swap buffers render_window->PollEvents(); render_window->SwapBuffers(); + + profiler.BeginFrame(); } /** @@ -242,28 +254,26 @@ void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x * Draws the emulated screens to the emulator window. */ void RendererOpenGL::DrawScreens() { - auto viewport_extent = GetViewportExtent(); - glViewport(viewport_extent.left, viewport_extent.top, viewport_extent.GetWidth(), viewport_extent.GetHeight()); // TODO: Or bottom? + auto layout = render_window->GetFramebufferLayout(); + + glViewport(0, 0, layout.width, layout.height); glClear(GL_COLOR_BUFFER_BIT); glUseProgram(program_id); // Set projection matrix - std::array<GLfloat, 3*2> ortho_matrix = MakeOrthographicMatrix((float)resolution_width, (float)resolution_height); + std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, + (float)layout.height); glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data()); // Bind texture in Texture Unit 0 glActiveTexture(GL_TEXTURE0); glUniform1i(uniform_color_texture, 0); - const float max_width = std::max((float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenBottomWidth); - const float top_x = 0.5f * (max_width - VideoCore::kScreenTopWidth); - const float bottom_x = 0.5f * (max_width - VideoCore::kScreenBottomWidth); - - DrawSingleScreenRotated(textures[0], top_x, 0, - (float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight); - DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight, - (float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight); + DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, + (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); + DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, + (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); m_current_frame++; } @@ -280,34 +290,6 @@ void RendererOpenGL::SetWindow(EmuWindow* window) { render_window = window; } -MathUtil::Rectangle<unsigned> RendererOpenGL::GetViewportExtent() { - unsigned framebuffer_width; - unsigned framebuffer_height; - std::tie(framebuffer_width, framebuffer_height) = render_window->GetFramebufferSize(); - - float window_aspect_ratio = static_cast<float>(framebuffer_height) / framebuffer_width; - float emulation_aspect_ratio = static_cast<float>(resolution_height) / resolution_width; - - MathUtil::Rectangle<unsigned> viewport_extent; - if (window_aspect_ratio > emulation_aspect_ratio) { - // Window is narrower than the emulation content => apply borders to the top and bottom - unsigned viewport_height = static_cast<unsigned>(std::round(emulation_aspect_ratio * framebuffer_width)); - viewport_extent.left = 0; - viewport_extent.top = (framebuffer_height - viewport_height) / 2; - viewport_extent.right = viewport_extent.left + framebuffer_width; - viewport_extent.bottom = viewport_extent.top + viewport_height; - } else { - // Otherwise, apply borders to the left and right sides of the window. - unsigned viewport_width = static_cast<unsigned>(std::round(framebuffer_height / emulation_aspect_ratio)); - viewport_extent.left = (framebuffer_width - viewport_width) / 2; - viewport_extent.top = 0; - viewport_extent.right = viewport_extent.left + viewport_width; - viewport_extent.bottom = viewport_extent.top + framebuffer_height; - } - - return viewport_extent; -} - /// Initialize the renderer void RendererOpenGL::Init() { render_window->MakeCurrent(); diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index bc8c0041c..4eb3e743e 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -17,6 +17,7 @@ #include "vertex_shader.h" #include "debug_utils/debug_utils.h" +using nihstro::OpCode; using nihstro::Instruction; using nihstro::RegisterType; using nihstro::SourceRegister; @@ -154,10 +155,10 @@ static void ProcessShaderCode(VertexShaderState& state) { } }; - switch (instr.opcode.GetInfo().type) { - case Instruction::OpCodeType::Arithmetic: + switch (instr.opcode.Value().GetInfo().type) { + case OpCode::Type::Arithmetic: { - bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed); // TODO: We don't really support this properly: For instance, the address register // offset needs to be applied to SRC2 instead, etc. // For now, we just abort in this situation. @@ -197,15 +198,15 @@ static void ProcessShaderCode(VertexShaderState& state) { src2[3] = src2[3] * float24::FromFloat32(-1); } - float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] - : (instr.common.dest < 0x10) ? dummy_vec4_float24 - : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] + float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()] + : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24 + : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] : dummy_vec4_float24; state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); - switch (instr.opcode.EffectiveOpCode()) { - case Instruction::OpCode::ADD: + switch (instr.opcode.Value().EffectiveOpCode()) { + case OpCode::Id::ADD: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -217,7 +218,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::MUL: + case OpCode::Id::MUL: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -229,7 +230,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::MAX: + case OpCode::Id::MAX: for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -238,11 +239,11 @@ static void ProcessShaderCode(VertexShaderState& state) { } break; - case Instruction::OpCode::DP3: - case Instruction::OpCode::DP4: + case OpCode::Id::DP3: + case OpCode::Id::DP4: { float24 dot = float24::FromFloat32(0.f); - int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; + int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) dot = dot + src1[i] * src2[i]; @@ -256,7 +257,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } // Reciprocal - case Instruction::OpCode::RCP: + case OpCode::Id::RCP: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -271,7 +272,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } // Reciprocal Square Root - case Instruction::OpCode::RSQ: + case OpCode::Id::RSQ: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -285,7 +286,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::MOVA: + case OpCode::Id::MOVA: { for (int i = 0; i < 2; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -298,7 +299,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::MOV: + case OpCode::Id::MOV: { for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -309,7 +310,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::CMP: + case OpCode::Id::CMP: for (int i = 0; i < 2; ++i) { // TODO: Can you restrict to one compare via dest masking? @@ -350,7 +351,7 @@ static void ProcessShaderCode(VertexShaderState& state) { default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); DEBUG_ASSERT(false); break; } @@ -358,9 +359,9 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCodeType::MultiplyAdd: + case OpCode::Type::MultiplyAdd: { - if (instr.opcode.EffectiveOpCode() == Instruction::OpCode::MAD) { + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) { const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; const float24* src1_ = LookupSourceRegister(instr.mad.src1); @@ -408,9 +409,9 @@ static void ProcessShaderCode(VertexShaderState& state) { src3[3] = src3[3] * float24::FromFloat32(-1); } - float24* dest = (instr.mad.dest < 0x08) ? state.output_register_table[4*instr.mad.dest.GetIndex()] - : (instr.mad.dest < 0x10) ? dummy_vec4_float24 - : (instr.mad.dest < 0x20) ? &state.temporary_registers[instr.mad.dest.GetIndex()][0] + float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()] + : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24 + : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] : dummy_vec4_float24; for (int i = 0; i < 4; ++i) { @@ -421,7 +422,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } } else { LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); } break; } @@ -448,31 +449,31 @@ static void ProcessShaderCode(VertexShaderState& state) { }; // Handle each instruction on its own - switch (instr.opcode) { - case Instruction::OpCode::END: + switch (instr.opcode.Value()) { + case OpCode::Id::END: exit_loop = true; break; - case Instruction::OpCode::JMPC: + case OpCode::Id::JMPC: if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; } break; - case Instruction::OpCode::JMPU: + case OpCode::Id::JMPU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; } break; - case Instruction::OpCode::CALL: + case OpCode::Id::CALL: call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, binary_offset + 1, 0, 0); break; - case Instruction::OpCode::CALLU: + case OpCode::Id::CALLU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { call(state, instr.flow_control.dest_offset, @@ -481,7 +482,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } break; - case Instruction::OpCode::CALLC: + case OpCode::Id::CALLC: if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { call(state, instr.flow_control.dest_offset, @@ -490,10 +491,10 @@ static void ProcessShaderCode(VertexShaderState& state) { } break; - case Instruction::OpCode::NOP: + case OpCode::Id::NOP: break; - case Instruction::OpCode::IFU: + case OpCode::Id::IFU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { call(state, binary_offset + 1, @@ -508,7 +509,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; - case Instruction::OpCode::IFC: + case OpCode::Id::IFC: { // TODO: Do we need to consider swizzlers here? @@ -527,7 +528,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::LOOP: + case OpCode::Id::LOOP: { state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y; @@ -542,7 +543,7 @@ static void ProcessShaderCode(VertexShaderState& state) { default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); break; } diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 0a236595c..b9d4ede3a 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -18,7 +18,6 @@ namespace VideoCore { EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window RendererBase* g_renderer = nullptr; ///< Renderer plugin -int g_current_frame = 0; /// Initialize the video core void Init(EmuWindow* emu_window) { @@ -27,8 +26,6 @@ void Init(EmuWindow* emu_window) { g_renderer->SetWindow(g_emu_window); g_renderer->Init(); - g_current_frame = 0; - LOG_DEBUG(Render, "initialized OK"); } diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index b782f17bd..1b51d39bf 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -30,7 +30,6 @@ static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height // --------------------- extern RendererBase* g_renderer; ///< Renderer plugin -extern int g_current_frame; ///< Current frame extern EmuWindow* g_emu_window; ///< Emu window /// Start the video core |