diff options
71 files changed, 13233 insertions, 1340 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 00d71dbdc..2ac94bc9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,7 +155,8 @@ IF (APPLE) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++") ELSEIF(MINGW) # GCC does not support codecvt, so use iconv instead - set(PLATFORM_LIBRARIES winmm ws2_32 iconv) + # PSAPI is the Process Status API + set(PLATFORM_LIBRARIES winmm ws2_32 psapi iconv) # WSAPoll functionality doesn't exist before WinNT 6.x (Vista and up) add_definitions(-D_WIN32_WINNT=0x0600) @@ -212,6 +213,7 @@ set(INI_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/externals/inih") include_directories(${INI_PREFIX}) add_subdirectory(${INI_PREFIX}) +include_directories(externals/microprofile) include_directories(externals/nihstro/include) if (MSVC) diff --git a/externals/microprofile/README.md b/externals/microprofile/README.md new file mode 100644 index 000000000..0a58d1c5a --- /dev/null +++ b/externals/microprofile/README.md @@ -0,0 +1,7 @@ +# microprofile + +MicroProfile is a embeddable profiler in a single file, written in C++ + +It can display profile information in the application, or by generating captures via a minimal built in webserver. + +For more information see the project webpage at https://bitbucket.org/jonasmeyer/microprofile diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h new file mode 100644 index 000000000..d1ae0c1c2 --- /dev/null +++ b/externals/microprofile/microprofile.h @@ -0,0 +1,3571 @@ +#pragma once +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or +// distribute this software, either in source code form or as a compiled +// binary, for any purpose, commercial or non-commercial, and by any +// means. +// In jurisdictions that recognize copyright laws, the author or authors +// of this software dedicate any and all copyright interest in the +// software to the public domain. We make this dedication for the benefit +// of the public at large and to the detriment of our heirs and +// successors. We intend this dedication to be an overt act of +// relinquishment in perpetuity of all present and future rights to this +// software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// For more information, please refer to <http://unlicense.org/> +// +// *********************************************************************** +// +// +// +// +// Howto: +// Call these functions from your code: +// MicroProfileOnThreadCreate +// MicroProfileMouseButton +// MicroProfileMousePosition +// MicroProfileModKey +// MicroProfileFlip <-- Call this once per frame +// MicroProfileDraw <-- Call this once per frame +// MicroProfileToggleDisplayMode <-- Bind to a key to toggle profiling +// MicroProfileTogglePause <-- Bind to a key to toggle pause +// +// Use these macros in your code in blocks you want to time: +// +// MICROPROFILE_DECLARE +// MICROPROFILE_DEFINE +// MICROPROFILE_DECLARE_GPU +// MICROPROFILE_DEFINE_GPU +// MICROPROFILE_SCOPE +// MICROPROFILE_SCOPEI +// MICROPROFILE_SCOPEGPU +// MICROPROFILE_SCOPEGPUI +// MICROPROFILE_META +// +// +// Usage: +// +// { +// MICROPROFILE_SCOPEI("GroupName", "TimerName", nColorRgb): +// ..Code to be timed.. +// } +// +// MICROPROFILE_DECLARE / MICROPROFILE_DEFINE allows defining groups in a shared place, to ensure sorting of the timers +// +// (in global scope) +// MICROPROFILE_DEFINE(g_ProfileFisk, "Fisk", "Skalle", nSomeColorRgb); +// +// (in some other file) +// MICROPROFILE_DECLARE(g_ProfileFisk); +// +// void foo(){ +// MICROPROFILE_SCOPE(g_ProfileFisk); +// } +// +// Once code is instrumented the gui is activeted by calling MicroProfileToggleDisplayMode or by clicking in the upper left corner of +// the screen +// +// The following functions must be implemented before the profiler is usable +// debug render: +// void MicroProfileDrawText(int nX, int nY, uint32_t nColor, const char* pText, uint32_t nNumCharacters); +// void MicroProfileDrawBox(int nX, int nY, int nX1, int nY1, uint32_t nColor, MicroProfileBoxType = MicroProfileBoxTypeFlat); +// void MicroProfileDrawLine2D(uint32_t nVertices, float* pVertices, uint32_t nColor); +// Gpu time stamps: (See below for d3d/opengl helper) +// uint32_t MicroProfileGpuInsertTimeStamp(); +// uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey); +// uint64_t MicroProfileTicksPerSecondGpu(); +// threading: +// const char* MicroProfileGetThreadName(); Threadnames in detailed view +// +// Default implementations of Gpu timestamp functions: +// Opengl: +// in .c file where MICROPROFILE_IMPL is defined: +// #define MICROPROFILE_GPU_TIMERS_GL +// call MicroProfileGpuInitGL() on startup +// D3D11: +// in .c file where MICROPROFILE_IMPL is defined: +// #define MICROPROFILE_GPU_TIMERS_D3D11 +// call MICROPROFILE_GPU_TIMERS_D3D11(). Pass Device & ImmediateContext +// +// Limitations: +// GPU timestamps can only be inserted from one thread. + + + +#ifndef MICROPROFILE_ENABLED +#define MICROPROFILE_ENABLED 1 +#endif + +#include <stdint.h> +typedef uint64_t MicroProfileToken; +typedef uint16_t MicroProfileGroupId; + +#if 0 == MICROPROFILE_ENABLED + +#define MICROPROFILE_DECLARE(var) +#define MICROPROFILE_DEFINE(var, group, name, color) +#define MICROPROFILE_REGISTER_GROUP(group, color, category) +#define MICROPROFILE_DECLARE_GPU(var) +#define MICROPROFILE_DEFINE_GPU(var, name, color) +#define MICROPROFILE_SCOPE(var) do{}while(0) +#define MICROPROFILE_SCOPEI(group, name, color) do{}while(0) +#define MICROPROFILE_SCOPEGPU(var) do{}while(0) +#define MICROPROFILE_SCOPEGPUI( name, color) do{}while(0) +#define MICROPROFILE_META_CPU(name, count) +#define MICROPROFILE_META_GPU(name, count) +#define MICROPROFILE_FORCEENABLECPUGROUP(s) do{} while(0) +#define MICROPROFILE_FORCEDISABLECPUGROUP(s) do{} while(0) +#define MICROPROFILE_FORCEENABLEGPUGROUP(s) do{} while(0) +#define MICROPROFILE_FORCEDISABLEGPUGROUP(s) do{} while(0) +#define MICROPROFILE_SCOPE_TOKEN(token) + +#define MicroProfileGetTime(group, name) 0.f +#define MicroProfileOnThreadCreate(foo) do{}while(0) +#define MicroProfileFlip() do{}while(0) +#define MicroProfileSetAggregateFrames(a) do{}while(0) +#define MicroProfileGetAggregateFrames() 0 +#define MicroProfileGetCurrentAggregateFrames() 0 +#define MicroProfileTogglePause() do{}while(0) +#define MicroProfileToggleAllGroups() do{} while(0) +#define MicroProfileDumpTimers() do{}while(0) +#define MicroProfileShutdown() do{}while(0) +#define MicroProfileSetForceEnable(a) do{} while(0) +#define MicroProfileGetForceEnable() false +#define MicroProfileSetEnableAllGroups(a) do{} while(0) +#define MicroProfileEnableCategory(a) do{} while(0) +#define MicroProfileDisableCategory(a) do{} while(0) +#define MicroProfileGetEnableAllGroups() false +#define MicroProfileSetForceMetaCounters(a) +#define MicroProfileGetForceMetaCounters() 0 +#define MicroProfileEnableMetaCounter(c) do{}while(0) +#define MicroProfileDisableMetaCounter(c) do{}while(0) +#define MicroProfileDumpFile(html,csv) do{} while(0) +#define MicroProfileWebServerPort() ((uint32_t)-1) + +#else + +#include <stdint.h> +#include <string.h> +#include <thread> +#include <mutex> +#include <atomic> + +#ifndef MICROPROFILE_API +#define MICROPROFILE_API +#endif + +MICROPROFILE_API int64_t MicroProfileTicksPerSecondCpu(); + + +#if defined(__APPLE__) +#include <mach/mach.h> +#include <mach/mach_time.h> +#include <unistd.h> +#include <libkern/OSAtomic.h> +#include <TargetConditionals.h> +#if TARGET_OS_IPHONE +#define MICROPROFILE_IOS +#endif + +#define MP_TICK() mach_absolute_time() +inline int64_t MicroProfileTicksPerSecondCpu() +{ + static int64_t nTicksPerSecond = 0; + if(nTicksPerSecond == 0) + { + mach_timebase_info_data_t sTimebaseInfo; + mach_timebase_info(&sTimebaseInfo); + nTicksPerSecond = 1000000000ll * sTimebaseInfo.denom / sTimebaseInfo.numer; + } + return nTicksPerSecond; +} +inline uint64_t MicroProfileGetCurrentThreadId() +{ + uint64_t tid; + pthread_threadid_np(pthread_self(), &tid); + return tid; +} + +#define MP_BREAK() __builtin_trap() +#define MP_THREAD_LOCAL __thread +#define MP_STRCASECMP strcasecmp +#define MP_GETCURRENTTHREADID() MicroProfileGetCurrentThreadId() +typedef uint64_t ThreadIdType; +#elif defined(_WIN32) +int64_t MicroProfileGetTick(); +#define MP_TICK() MicroProfileGetTick() +#define MP_BREAK() __debugbreak() +#define MP_THREAD_LOCAL __declspec(thread) +#define MP_STRCASECMP _stricmp +#define MP_GETCURRENTTHREADID() GetCurrentThreadId() +typedef uint32_t ThreadIdType; + +#elif defined(__linux__) +#include <unistd.h> +#include <time.h> +inline int64_t MicroProfileTicksPerSecondCpu() +{ + return 1000000000ll; +} + +inline int64_t MicroProfileGetTick() +{ + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return 1000000000ll * ts.tv_sec + ts.tv_nsec; +} +#define MP_TICK() MicroProfileGetTick() +#define MP_BREAK() __builtin_trap() +#define MP_THREAD_LOCAL __thread +#define MP_STRCASECMP strcasecmp +#define MP_GETCURRENTTHREADID() (uint64_t)pthread_self() +typedef uint64_t ThreadIdType; +#endif + + +#ifndef MP_GETCURRENTTHREADID +#define MP_GETCURRENTTHREADID() 0 +typedef uint32_t ThreadIdType; +#endif + + +#define MP_ASSERT(a) do{if(!(a)){MP_BREAK();} }while(0) +#define MICROPROFILE_DECLARE(var) extern MicroProfileToken g_mp_##var +#define MICROPROFILE_DEFINE(var, group, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu) +#define MICROPROFILE_REGISTER_GROUP(group, category, color) MicroProfileRegisterGroup(group, category, color) +#define MICROPROFILE_DECLARE_GPU(var) extern MicroProfileToken g_mp_##var +#define MICROPROFILE_DEFINE_GPU(var, name, color) MicroProfileToken g_mp_##var = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu) +#define MICROPROFILE_TOKEN_PASTE0(a, b) a ## b +#define MICROPROFILE_TOKEN_PASTE(a, b) MICROPROFILE_TOKEN_PASTE0(a,b) +#define MICROPROFILE_SCOPE(var) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var) +#define MICROPROFILE_SCOPE_TOKEN(token) MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(token) +#define MICROPROFILE_SCOPEI(group, name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken(group, name, color, MicroProfileTokenTypeCpu); MicroProfileScopeHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__)) +#define MICROPROFILE_SCOPEGPU(var) MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo, __LINE__)(g_mp_##var) +#define MICROPROFILE_SCOPEGPUI(name, color) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__) = MicroProfileGetToken("GPU", name, color, MicroProfileTokenTypeGpu); MicroProfileScopeGpuHandler MICROPROFILE_TOKEN_PASTE(foo,__LINE__)( MICROPROFILE_TOKEN_PASTE(g_mp,__LINE__)) +#define MICROPROFILE_META_CPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeCpu) +#define MICROPROFILE_META_GPU(name, count) static MicroProfileToken MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__) = MicroProfileGetMetaToken(name); MicroProfileMetaUpdate(MICROPROFILE_TOKEN_PASTE(g_mp_meta,__LINE__), count, MicroProfileTokenTypeGpu) + + +#ifndef MICROPROFILE_USE_THREAD_NAME_CALLBACK +#define MICROPROFILE_USE_THREAD_NAME_CALLBACK 0 +#endif + +#ifndef MICROPROFILE_PER_THREAD_BUFFER_SIZE +#define MICROPROFILE_PER_THREAD_BUFFER_SIZE (2048<<10) +#endif + +#ifndef MICROPROFILE_MAX_FRAME_HISTORY +#define MICROPROFILE_MAX_FRAME_HISTORY 512 +#endif + +#ifndef MICROPROFILE_PRINTF +#define MICROPROFILE_PRINTF printf +#endif + +#ifndef MICROPROFILE_META_MAX +#define MICROPROFILE_META_MAX 8 +#endif + +#ifndef MICROPROFILE_WEBSERVER_PORT +#define MICROPROFILE_WEBSERVER_PORT 1338 +#endif + +#ifndef MICROPROFILE_WEBSERVER +#define MICROPROFILE_WEBSERVER 1 +#endif + +#ifndef MICROPROFILE_WEBSERVER_MAXFRAMES +#define MICROPROFILE_WEBSERVER_MAXFRAMES 30 +#endif + +#ifndef MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE +#define MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE (16<<10) +#endif + +#ifndef MICROPROFILE_GPU_TIMERS +#define MICROPROFILE_GPU_TIMERS 1 +#endif + +#ifndef MICROPROFILE_GPU_FRAME_DELAY +#define MICROPROFILE_GPU_FRAME_DELAY 3 //must be > 0 +#endif + + +#ifndef MICROPROFILE_NAME_MAX_LEN +#define MICROPROFILE_NAME_MAX_LEN 64 +#endif + +#define MICROPROFILE_FORCEENABLECPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeCpu) +#define MICROPROFILE_FORCEDISABLECPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeCpu) +#define MICROPROFILE_FORCEENABLEGPUGROUP(s) MicroProfileForceEnableGroup(s, MicroProfileTokenTypeGpu) +#define MICROPROFILE_FORCEDISABLEGPUGROUP(s) MicroProfileForceDisableGroup(s, MicroProfileTokenTypeGpu) + +#define MICROPROFILE_INVALID_TICK ((uint64_t)-1) +#define MICROPROFILE_GROUP_MASK_ALL 0xffffffffffff + + +#define MICROPROFILE_INVALID_TOKEN (uint64_t)-1 + +enum MicroProfileTokenType +{ + MicroProfileTokenTypeCpu, + MicroProfileTokenTypeGpu, +}; + +enum MicroProfileBoxType +{ + MicroProfileBoxTypeBar, + MicroProfileBoxTypeFlat, +}; + + + +struct MicroProfile; + +MICROPROFILE_API void MicroProfileInit(); +MICROPROFILE_API void MicroProfileShutdown(); +MICROPROFILE_API MicroProfileToken MicroProfileFindToken(const char* sGroup, const char* sName); +MICROPROFILE_API MicroProfileToken MicroProfileGetToken(const char* sGroup, const char* sName, uint32_t nColor, MicroProfileTokenType Token = MicroProfileTokenTypeCpu); +MICROPROFILE_API MicroProfileToken MicroProfileGetMetaToken(const char* pName); +MICROPROFILE_API void MicroProfileMetaUpdate(MicroProfileToken, int nCount, MicroProfileTokenType eTokenType); +MICROPROFILE_API uint64_t MicroProfileEnter(MicroProfileToken nToken); +MICROPROFILE_API void MicroProfileLeave(MicroProfileToken nToken, uint64_t nTick); +MICROPROFILE_API uint64_t MicroProfileGpuEnter(MicroProfileToken nToken); +MICROPROFILE_API void MicroProfileGpuLeave(MicroProfileToken nToken, uint64_t nTick); +inline uint16_t MicroProfileGetTimerIndex(MicroProfileToken t){ return (t&0xffff); } +inline uint64_t MicroProfileGetGroupMask(MicroProfileToken t){ return ((t>>16)&MICROPROFILE_GROUP_MASK_ALL);} +inline MicroProfileToken MicroProfileMakeToken(uint64_t nGroupMask, uint16_t nTimer){ return (nGroupMask<<16) | nTimer;} + +MICROPROFILE_API void MicroProfileFlip(); //! call once per frame. +MICROPROFILE_API void MicroProfileTogglePause(); +MICROPROFILE_API void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type); +MICROPROFILE_API void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type); +MICROPROFILE_API float MicroProfileGetTime(const char* pGroup, const char* pName); +MICROPROFILE_API void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu); +MICROPROFILE_API void MicroProfileOnThreadCreate(const char* pThreadName); //should be called from newly created threads +MICROPROFILE_API void MicroProfileOnThreadExit(); //call on exit to reuse log +MICROPROFILE_API void MicroProfileInitThreadLog(); +MICROPROFILE_API void MicroProfileSetForceEnable(bool bForceEnable); +MICROPROFILE_API bool MicroProfileGetForceEnable(); +MICROPROFILE_API void MicroProfileSetEnableAllGroups(bool bEnable); +MICROPROFILE_API void MicroProfileEnableCategory(const char* pCategory); +MICROPROFILE_API void MicroProfileDisableCategory(const char* pCategory); +MICROPROFILE_API bool MicroProfileGetEnableAllGroups(); +MICROPROFILE_API void MicroProfileSetForceMetaCounters(bool bEnable); +MICROPROFILE_API bool MicroProfileGetForceMetaCounters(); +MICROPROFILE_API void MicroProfileEnableMetaCounter(const char* pMet); +MICROPROFILE_API void MicroProfileDisableMetaCounter(const char* pMet); +MICROPROFILE_API void MicroProfileSetAggregateFrames(int frames); +MICROPROFILE_API int MicroProfileGetAggregateFrames(); +MICROPROFILE_API int MicroProfileGetCurrentAggregateFrames(); +MICROPROFILE_API MicroProfile* MicroProfileGet(); +MICROPROFILE_API void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2]); +MICROPROFILE_API std::recursive_mutex& MicroProfileGetMutex(); +MICROPROFILE_API void MicroProfileStartContextSwitchTrace(); +MICROPROFILE_API void MicroProfileStopContextSwitchTrace(); +MICROPROFILE_API bool MicroProfileIsLocalThread(uint32_t nThreadId); + + +#if MICROPROFILE_WEBSERVER +MICROPROFILE_API void MicroProfileDumpFile(const char* pHtml, const char* pCsv); +MICROPROFILE_API uint32_t MicroProfileWebServerPort(); +#else +#define MicroProfileDumpFile(c) do{} while(0) +#define MicroProfileWebServerPort() ((uint32_t)-1) +#endif + + + + +#if MICROPROFILE_GPU_TIMERS +MICROPROFILE_API uint32_t MicroProfileGpuInsertTimeStamp(); +MICROPROFILE_API uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey); +MICROPROFILE_API uint64_t MicroProfileTicksPerSecondGpu(); +MICROPROFILE_API int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu); +#else +#define MicroProfileGpuInsertTimeStamp() 1 +#define MicroProfileGpuGetTimeStamp(a) 0 +#define MicroProfileTicksPerSecondGpu() 1 +#define MicroProfileGetGpuTickReference(a,b) 0 +#endif + +#if MICROPROFILE_GPU_TIMERS_D3D11 +#define MICROPROFILE_D3D_MAX_QUERIES (8<<10) +MICROPROFILE_API void MicroProfileGpuInitD3D11(void* pDevice, void* pDeviceContext); +#endif + +#if MICROPROFILE_GPU_TIMERS_GL +#define MICROPROFILE_GL_MAX_QUERIES (8<<10) +MICROPROFILE_API void MicroProfileGpuInitGL(); +#endif + + + +#if MICROPROFILE_USE_THREAD_NAME_CALLBACK +MICROPROFILE_API const char* MicroProfileGetThreadName(); +#else +#define MicroProfileGetThreadName() "<implement MicroProfileGetThreadName to get threadnames>" +#endif + +#if !defined(MICROPROFILE_THREAD_NAME_FROM_ID) +#define MICROPROFILE_THREAD_NAME_FROM_ID(a) "" +#endif + + +struct MicroProfileScopeHandler +{ + MicroProfileToken nToken; + uint64_t nTick; + MicroProfileScopeHandler(MicroProfileToken Token):nToken(Token) + { + nTick = MicroProfileEnter(nToken); + } + ~MicroProfileScopeHandler() + { + MicroProfileLeave(nToken, nTick); + } +}; + +struct MicroProfileScopeGpuHandler +{ + MicroProfileToken nToken; + uint64_t nTick; + MicroProfileScopeGpuHandler(MicroProfileToken Token):nToken(Token) + { + nTick = MicroProfileGpuEnter(nToken); + } + ~MicroProfileScopeGpuHandler() + { + MicroProfileGpuLeave(nToken, nTick); + } +}; + + + +#define MICROPROFILE_MAX_TIMERS 1024 +#define MICROPROFILE_MAX_GROUPS 48 //dont bump! no. of bits used it bitmask +#define MICROPROFILE_MAX_CATEGORIES 16 +#define MICROPROFILE_MAX_GRAPHS 5 +#define MICROPROFILE_GRAPH_HISTORY 128 +#define MICROPROFILE_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_BUFFER_SIZE)/sizeof(MicroProfileLogEntry)) +#define MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS 256 +#define MICROPROFILE_STACK_MAX 32 +//#define MICROPROFILE_MAX_PRESETS 5 +#define MICROPROFILE_ANIM_DELAY_PRC 0.5f +#define MICROPROFILE_GAP_TIME 50 //extra ms to fetch to close timers from earlier frames + + +#ifndef MICROPROFILE_MAX_THREADS +#define MICROPROFILE_MAX_THREADS 32 +#endif + +#ifndef MICROPROFILE_UNPACK_RED +#define MICROPROFILE_UNPACK_RED(c) ((c)>>16) +#endif + +#ifndef MICROPROFILE_UNPACK_GREEN +#define MICROPROFILE_UNPACK_GREEN(c) ((c)>>8) +#endif + +#ifndef MICROPROFILE_UNPACK_BLUE +#define MICROPROFILE_UNPACK_BLUE(c) ((c)) +#endif + +#ifndef MICROPROFILE_DEFAULT_PRESET +#define MICROPROFILE_DEFAULT_PRESET "Default" +#endif + + +#ifndef MICROPROFILE_CONTEXT_SWITCH_TRACE +#if defined(_WIN32) +#define MICROPROFILE_CONTEXT_SWITCH_TRACE 1 +#elif defined(__APPLE__) +#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0 //disabled until dtrace script is working. +#else +#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0 +#endif +#endif + +#if MICROPROFILE_CONTEXT_SWITCH_TRACE +#define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (128*1024) //2mb with 16 byte entry size +#else +#define MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE (1) +#endif + +#ifndef MICROPROFILE_MINIZ +#define MICROPROFILE_MINIZ 0 +#endif + +#ifdef _WIN32 +#include <basetsd.h> +typedef UINT_PTR MpSocket; +#else +typedef int MpSocket; +#endif + + +#if defined(__APPLE__) || defined(__linux__) +typedef pthread_t MicroProfileThread; +#elif defined(_WIN32) +typedef HANDLE MicroProfileThread; +#else +typedef std::thread* MicroProfileThread; +#endif + + + +enum MicroProfileDrawMask +{ + MP_DRAW_OFF = 0x0, + MP_DRAW_BARS = 0x1, + MP_DRAW_DETAILED = 0x2, + MP_DRAW_HIDDEN = 0x3, +}; + +enum MicroProfileDrawBarsMask +{ + MP_DRAW_TIMERS = 0x1, + MP_DRAW_AVERAGE = 0x2, + MP_DRAW_MAX = 0x4, + MP_DRAW_CALL_COUNT = 0x8, + MP_DRAW_TIMERS_EXCLUSIVE = 0x10, + MP_DRAW_AVERAGE_EXCLUSIVE = 0x20, + MP_DRAW_MAX_EXCLUSIVE = 0x40, + MP_DRAW_META_FIRST = 0x80, + MP_DRAW_ALL = 0xffffffff, + +}; + +typedef uint64_t MicroProfileLogEntry; + +struct MicroProfileTimer +{ + uint64_t nTicks; + uint32_t nCount; +}; + +struct MicroProfileCategory +{ + char pName[MICROPROFILE_NAME_MAX_LEN]; + uint64_t nGroupMask; +}; + +struct MicroProfileGroupInfo +{ + char pName[MICROPROFILE_NAME_MAX_LEN]; + uint32_t nNameLen; + uint32_t nGroupIndex; + uint32_t nNumTimers; + uint32_t nMaxTimerNameLen; + uint32_t nColor; + uint32_t nCategory; + MicroProfileTokenType Type; +}; + +struct MicroProfileTimerInfo +{ + MicroProfileToken nToken; + uint32_t nTimerIndex; + uint32_t nGroupIndex; + char pName[MICROPROFILE_NAME_MAX_LEN]; + uint32_t nNameLen; + uint32_t nColor; + bool bGraph; +}; + +struct MicroProfileGraphState +{ + int64_t nHistory[MICROPROFILE_GRAPH_HISTORY]; + MicroProfileToken nToken; + int32_t nKey; +}; + +struct MicroProfileContextSwitch +{ + ThreadIdType nThreadOut; + ThreadIdType nThreadIn; + int64_t nCpu : 8; + int64_t nTicks : 56; +}; + + +struct MicroProfileFrameState +{ + int64_t nFrameStartCpu; + int64_t nFrameStartGpu; + uint32_t nLogStart[MICROPROFILE_MAX_THREADS]; +}; + +struct MicroProfileThreadLog +{ + MicroProfileLogEntry Log[MICROPROFILE_BUFFER_SIZE]; + + std::atomic<uint32_t> nPut; + std::atomic<uint32_t> nGet; + uint32_t nActive; + uint32_t nGpu; + ThreadIdType nThreadId; + + uint32_t nStack[MICROPROFILE_STACK_MAX]; + int64_t nChildTickStack[MICROPROFILE_STACK_MAX]; + uint32_t nStackPos; + + + uint8_t nGroupStackPos[MICROPROFILE_MAX_GROUPS]; + int64_t nGroupTicks[MICROPROFILE_MAX_GROUPS]; + int64_t nAggregateGroupTicks[MICROPROFILE_MAX_GROUPS]; + enum + { + THREAD_MAX_LEN = 64, + }; + char ThreadName[64]; + int nFreeListNext; +}; + +#if MICROPROFILE_GPU_TIMERS_D3D11 +struct MicroProfileD3D11Frame +{ + uint32_t m_nQueryStart; + uint32_t m_nQueryCount; + uint32_t m_nRateQueryStarted; + void* m_pRateQuery; +}; + +struct MicroProfileGpuTimerState +{ + uint32_t bInitialized; + void* m_pDevice; + void* m_pDeviceContext; + void* m_pQueries[MICROPROFILE_D3D_MAX_QUERIES]; + int64_t m_nQueryResults[MICROPROFILE_D3D_MAX_QUERIES]; + uint32_t m_nQueryPut; + uint32_t m_nQueryGet; + uint32_t m_nQueryFrame; + int64_t m_nQueryFrequency; + MicroProfileD3D11Frame m_QueryFrames[MICROPROFILE_GPU_FRAME_DELAY]; +}; +#elif MICROPROFILE_GPU_TIMERS_GL +struct MicroProfileGpuTimerState +{ + uint32_t GLTimers[MICROPROFILE_GL_MAX_QUERIES]; + uint32_t GLTimerPos; +}; +#else +struct MicroProfileGpuTimerState{}; +#endif + +struct MicroProfile +{ + uint32_t nTotalTimers; + uint32_t nGroupCount; + uint32_t nCategoryCount; + uint32_t nAggregateClear; + uint32_t nAggregateFlip; + uint32_t nAggregateFlipCount; + uint32_t nAggregateFrames; + + uint64_t nAggregateFlipTick; + + uint32_t nDisplay; + uint32_t nBars; + uint64_t nActiveGroup; + uint32_t nActiveBars; + + uint64_t nForceGroup; + uint32_t nForceEnable; + uint32_t nForceMetaCounters; + + uint64_t nForceGroupUI; + uint64_t nActiveGroupWanted; + uint32_t nAllGroupsWanted; + uint32_t nAllThreadsWanted; + + uint32_t nOverflow; + + uint64_t nGroupMask; + uint32_t nRunning; + uint32_t nToggleRunning; + uint32_t nMaxGroupSize; + uint32_t nDumpFileNextFrame; + uint32_t nAutoClearFrames; + char HtmlDumpPath[512]; + char CsvDumpPath[512]; + + int64_t nPauseTicks; + + float fReferenceTime; + float fRcpReferenceTime; + + MicroProfileCategory CategoryInfo[MICROPROFILE_MAX_CATEGORIES]; + MicroProfileGroupInfo GroupInfo[MICROPROFILE_MAX_GROUPS]; + MicroProfileTimerInfo TimerInfo[MICROPROFILE_MAX_TIMERS]; + uint8_t TimerToGroup[MICROPROFILE_MAX_TIMERS]; + + MicroProfileTimer AccumTimers[MICROPROFILE_MAX_TIMERS]; + uint64_t AccumMaxTimers[MICROPROFILE_MAX_TIMERS]; + uint64_t AccumTimersExclusive[MICROPROFILE_MAX_TIMERS]; + uint64_t AccumMaxTimersExclusive[MICROPROFILE_MAX_TIMERS]; + + MicroProfileTimer Frame[MICROPROFILE_MAX_TIMERS]; + uint64_t FrameExclusive[MICROPROFILE_MAX_TIMERS]; + + MicroProfileTimer Aggregate[MICROPROFILE_MAX_TIMERS]; + uint64_t AggregateMax[MICROPROFILE_MAX_TIMERS]; + uint64_t AggregateExclusive[MICROPROFILE_MAX_TIMERS]; + uint64_t AggregateMaxExclusive[MICROPROFILE_MAX_TIMERS]; + + + uint64_t FrameGroup[MICROPROFILE_MAX_GROUPS]; + uint64_t AccumGroup[MICROPROFILE_MAX_GROUPS]; + uint64_t AccumGroupMax[MICROPROFILE_MAX_GROUPS]; + + uint64_t AggregateGroup[MICROPROFILE_MAX_GROUPS]; + uint64_t AggregateGroupMax[MICROPROFILE_MAX_GROUPS]; + + + struct + { + uint64_t nCounters[MICROPROFILE_MAX_TIMERS]; + + uint64_t nAccum[MICROPROFILE_MAX_TIMERS]; + uint64_t nAccumMax[MICROPROFILE_MAX_TIMERS]; + + uint64_t nAggregate[MICROPROFILE_MAX_TIMERS]; + uint64_t nAggregateMax[MICROPROFILE_MAX_TIMERS]; + + uint64_t nSum; + uint64_t nSumAccum; + uint64_t nSumAccumMax; + uint64_t nSumAggregate; + uint64_t nSumAggregateMax; + + const char* pName; + } MetaCounters[MICROPROFILE_META_MAX]; + + MicroProfileGraphState Graph[MICROPROFILE_MAX_GRAPHS]; + uint32_t nGraphPut; + + uint32_t nThreadActive[MICROPROFILE_MAX_THREADS]; + MicroProfileThreadLog* Pool[MICROPROFILE_MAX_THREADS]; + uint32_t nNumLogs; + uint32_t nMemUsage; + int nFreeListHead; + + uint32_t nFrameCurrent; + uint32_t nFrameCurrentIndex; + uint32_t nFramePut; + uint64_t nFramePutIndex; + + MicroProfileFrameState Frames[MICROPROFILE_MAX_FRAME_HISTORY]; + + uint64_t nFlipTicks; + uint64_t nFlipAggregate; + uint64_t nFlipMax; + uint64_t nFlipAggregateDisplay; + uint64_t nFlipMaxDisplay; + + MicroProfileThread ContextSwitchThread; + bool bContextSwitchRunning; + bool bContextSwitchStop; + bool bContextSwitchAllThreads; + bool bContextSwitchNoBars; + uint32_t nContextSwitchUsage; + uint32_t nContextSwitchLastPut; + + int64_t nContextSwitchHoverTickIn; + int64_t nContextSwitchHoverTickOut; + uint32_t nContextSwitchHoverThread; + uint32_t nContextSwitchHoverThreadBefore; + uint32_t nContextSwitchHoverThreadAfter; + uint8_t nContextSwitchHoverCpu; + uint8_t nContextSwitchHoverCpuNext; + + uint32_t nContextSwitchPut; + MicroProfileContextSwitch ContextSwitch[MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE]; + + + MpSocket ListenerSocket; + uint32_t nWebServerPort; + + char WebServerBuffer[MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE]; + uint32_t WebServerPut; + + uint64_t nWebServerDataSent; + + MicroProfileGpuTimerState GPU; + + +}; + +#define MP_LOG_TICK_MASK 0x0000ffffffffffff +#define MP_LOG_INDEX_MASK 0x3fff000000000000 +#define MP_LOG_BEGIN_MASK 0xc000000000000000 +#define MP_LOG_GPU_EXTRA 0x3 +#define MP_LOG_META 0x2 +#define MP_LOG_ENTER 0x1 +#define MP_LOG_LEAVE 0x0 + + +inline int MicroProfileLogType(MicroProfileLogEntry Index) +{ + return ((MP_LOG_BEGIN_MASK & Index)>>62) & 0x3; +} + +inline uint64_t MicroProfileLogTimerIndex(MicroProfileLogEntry Index) +{ + return (0x3fff&(Index>>48)); +} + +inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick) +{ + MicroProfileLogEntry Entry = (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick); + int t = MicroProfileLogType(Entry); + uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry); + MP_ASSERT(t == nBegin); + MP_ASSERT(nTimerIndex == (nToken&0x3fff)); + return Entry; + +} + +inline int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End) +{ + uint64_t nStart = Start; + uint64_t nEnd = End; + int64_t nDifference = ((nEnd<<16) - (nStart<<16)); + return nDifference >> 16; +} + +inline int64_t MicroProfileLogGetTick(MicroProfileLogEntry e) +{ + return MP_LOG_TICK_MASK & e; +} + +inline int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick) +{ + return (MP_LOG_TICK_MASK & nTick) | (e & ~MP_LOG_TICK_MASK); +} + +template<typename T> +T MicroProfileMin(T a, T b) +{ return a < b ? a : b; } + +template<typename T> +T MicroProfileMax(T a, T b) +{ return a > b ? a : b; } + +inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond) +{ + return (int64_t)(fMs*0.001f*nTicksPerSecond); +} + +inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond) +{ + return 1000.f / nTicksPerSecond; +} + +inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t) +{ + return (uint16_t)MicroProfileGet()->TimerToGroup[MicroProfileGetTimerIndex(t)]; +} + + + +#ifdef MICROPROFILE_IMPL + +#ifdef _WIN32 +#include <windows.h> +#define snprintf _snprintf + +#pragma warning(push) +#pragma warning(disable: 4244) +int64_t MicroProfileTicksPerSecondCpu() +{ + static int64_t nTicksPerSecond = 0; + if(nTicksPerSecond == 0) + { + QueryPerformanceFrequency((LARGE_INTEGER*)&nTicksPerSecond); + } + return nTicksPerSecond; +} +int64_t MicroProfileGetTick() +{ + int64_t ticks; + QueryPerformanceCounter((LARGE_INTEGER*)&ticks); + return ticks; +} + +#endif + +#if defined(MICROPROFILE_WEBSERVER) || defined(MICROPROFILE_CONTEXT_SWITCH_TRACE) + + +typedef void* (*MicroProfileThreadFunc)(void*); + +#if defined(__APPLE__) || defined(__linux__) +typedef pthread_t MicroProfileThread; +void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func) +{ + pthread_attr_t Attr; + int r = pthread_attr_init(&Attr); + MP_ASSERT(r == 0); + pthread_create(pThread, &Attr, Func, 0); +} +void MicroProfileThreadJoin(MicroProfileThread* pThread) +{ + int r = pthread_join(*pThread, 0); + MP_ASSERT(r == 0); +} +#elif defined(_WIN32) +typedef HANDLE MicroProfileThread; +DWORD _stdcall ThreadTrampoline(void* pFunc) +{ + MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc; + return (uint32_t)F(0); +} + +void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func) +{ + *pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0); +} +void MicroProfileThreadJoin(MicroProfileThread* pThread) +{ + WaitForSingleObject(*pThread, INFINITE); + CloseHandle(*pThread); +} +#else +#include <thread> +typedef std::thread* MicroProfileThread; +inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func) +{ + *pThread = new std::thread(Func, nullptr); +} +inline void MicroProfileThreadJoin(MicroProfileThread* pThread) +{ + (*pThread)->join(); + delete *pThread; +} +#endif +#endif + +#if MICROPROFILE_WEBSERVER + +#ifdef _WIN32 +#define MP_INVALID_SOCKET(f) (f == INVALID_SOCKET) +#endif + +#if defined(__APPLE__) +#include <sys/socket.h> +#include <netinet/in.h> +#include <fcntl.h> +#define MP_INVALID_SOCKET(f) (f < 0) +#endif + + +void MicroProfileWebServerStart(); +void MicroProfileWebServerStop(); +bool MicroProfileWebServerUpdate(); +void MicroProfileDumpToFile(); + +#else + +#define MicroProfileWebServerStart() do{}while(0) +#define MicroProfileWebServerStop() do{}while(0) +#define MicroProfileWebServerUpdate() false +#define MicroProfileDumpToFile() do{} while(0) +#endif + + +#if MICROPROFILE_GPU_TIMERS_D3D11 +void MicroProfileGpuFlip(); +void MicroProfileGpuShutdown(); +#else +#define MicroProfileGpuFlip() do{}while(0) +#define MicroProfileGpuShutdown() do{}while(0) +#endif + + + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <algorithm> + + +#ifndef MICROPROFILE_DEBUG +#define MICROPROFILE_DEBUG 0 +#endif + + +#define S g_MicroProfile + +MicroProfile g_MicroProfile; +MicroProfileThreadLog* g_MicroProfileGpuLog = 0; +#ifdef MICROPROFILE_IOS +// iOS doesn't support __thread +static pthread_key_t g_MicroProfileThreadLogKey; +static pthread_once_t g_MicroProfileThreadLogKeyOnce = PTHREAD_ONCE_INIT; +static void MicroProfileCreateThreadLogKey() +{ + pthread_key_create(&g_MicroProfileThreadLogKey, NULL); +} +#else +MP_THREAD_LOCAL MicroProfileThreadLog* g_MicroProfileThreadLog = 0; +#endif +static bool g_bUseLock = false; /// This is used because windows does not support using mutexes under dll init(which is where global initialization is handled) + + +MICROPROFILE_DEFINE(g_MicroProfileFlip, "MicroProfile", "MicroProfileFlip", 0x3355ee); +MICROPROFILE_DEFINE(g_MicroProfileThreadLoop, "MicroProfile", "ThreadLoop", 0x3355ee); +MICROPROFILE_DEFINE(g_MicroProfileClear, "MicroProfile", "Clear", 0x3355ee); +MICROPROFILE_DEFINE(g_MicroProfileAccumulate, "MicroProfile", "Accumulate", 0x3355ee); +MICROPROFILE_DEFINE(g_MicroProfileContextSwitchSearch,"MicroProfile", "ContextSwitchSearch", 0xDD7300); + +inline std::recursive_mutex& MicroProfileMutex() +{ + static std::recursive_mutex Mutex; + return Mutex; +} +std::recursive_mutex& MicroProfileGetMutex() +{ + return MicroProfileMutex(); +} + +MICROPROFILE_API MicroProfile* MicroProfileGet() +{ + return &g_MicroProfile; +} + + +MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName); + + +void MicroProfileInit() +{ + std::recursive_mutex& mutex = MicroProfileMutex(); + bool bUseLock = g_bUseLock; + if(bUseLock) + mutex.lock(); + static bool bOnce = true; + if(bOnce) + { + S.nMemUsage += sizeof(S); + bOnce = false; + memset(&S, 0, sizeof(S)); + for(int i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + S.GroupInfo[i].pName[0] = '\0'; + } + for(int i = 0; i < MICROPROFILE_MAX_CATEGORIES; ++i) + { + S.CategoryInfo[i].pName[0] = '\0'; + S.CategoryInfo[i].nGroupMask = 0; + } + strcpy(&S.CategoryInfo[0].pName[0], "default"); + S.nCategoryCount = 1; + for(int i = 0; i < MICROPROFILE_MAX_TIMERS; ++i) + { + S.TimerInfo[i].pName[0] = '\0'; + } + S.nGroupCount = 0; + S.nAggregateFlipTick = MP_TICK(); + S.nActiveGroup = 0; + S.nActiveBars = 0; + S.nForceGroup = 0; + S.nAllGroupsWanted = 0; + S.nActiveGroupWanted = 0; + S.nAllThreadsWanted = 1; + S.nAggregateFlip = 0; + S.nTotalTimers = 0; + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN; + } + S.nRunning = 1; + S.fReferenceTime = 33.33f; + S.fRcpReferenceTime = 1.f / S.fReferenceTime; + S.nFreeListHead = -1; + int64_t nTick = MP_TICK(); + for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i) + { + S.Frames[i].nFrameStartCpu = nTick; + S.Frames[i].nFrameStartGpu = -1; + } + + MicroProfileThreadLog* pGpu = MicroProfileCreateThreadLog("GPU"); + g_MicroProfileGpuLog = pGpu; + MP_ASSERT(S.Pool[0] == pGpu); + pGpu->nGpu = 1; + pGpu->nThreadId = 0; + + S.nWebServerDataSent = (uint64_t)-1; + } + if(bUseLock) + mutex.unlock(); +} + +void MicroProfileShutdown() +{ + std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex()); + MicroProfileWebServerStop(); + MicroProfileStopContextSwitchTrace(); + MicroProfileGpuShutdown(); +} + +#ifdef MICROPROFILE_IOS +inline MicroProfileThreadLog* MicroProfileGetThreadLog() +{ + pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey); + return (MicroProfileThreadLog*)pthread_getspecific(g_MicroProfileThreadLogKey); +} + +inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog) +{ + pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey); + pthread_setspecific(g_MicroProfileThreadLogKey, pLog); +} +#else +MicroProfileThreadLog* MicroProfileGetThreadLog() +{ + return g_MicroProfileThreadLog; +} +inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog) +{ + g_MicroProfileThreadLog = pLog; +} +#endif + + +MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName) +{ + MicroProfileThreadLog* pLog = 0; + if(S.nFreeListHead != -1) + { + pLog = S.Pool[S.nFreeListHead]; + MP_ASSERT(pLog->nPut.load() == 0); + MP_ASSERT(pLog->nGet.load() == 0); + S.nFreeListHead = S.Pool[S.nFreeListHead]->nFreeListNext; + } + else + { + pLog = new MicroProfileThreadLog; + S.nMemUsage += sizeof(MicroProfileThreadLog); + S.Pool[S.nNumLogs++] = pLog; + } + memset(pLog, 0, sizeof(*pLog)); + int len = (int)strlen(pName); + int maxlen = sizeof(pLog->ThreadName)-1; + len = len < maxlen ? len : maxlen; + memcpy(&pLog->ThreadName[0], pName, len); + pLog->ThreadName[len] = '\0'; + pLog->nThreadId = MP_GETCURRENTTHREADID(); + pLog->nFreeListNext = -1; + pLog->nActive = 1; + return pLog; +} + +void MicroProfileOnThreadCreate(const char* pThreadName) +{ + g_bUseLock = true; + MicroProfileInit(); + std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex()); + MP_ASSERT(MicroProfileGetThreadLog() == 0); + MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pThreadName ? pThreadName : MicroProfileGetThreadName()); + MP_ASSERT(pLog); + MicroProfileSetThreadLog(pLog); +} + +void MicroProfileOnThreadExit() +{ + std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex()); + MicroProfileThreadLog* pLog = MicroProfileGetThreadLog(); + if(pLog) + { + int32_t nLogIndex = -1; + for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + if(pLog == S.Pool[i]) + { + nLogIndex = i; + break; + } + } + MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS && nLogIndex > 0); + pLog->nFreeListNext = S.nFreeListHead; + pLog->nActive = 0; + pLog->nPut.store(0); + pLog->nGet.store(0); + S.nFreeListHead = nLogIndex; + for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i) + { + S.Frames[i].nLogStart[nLogIndex] = 0; + } + memset(pLog->nGroupStackPos, 0, sizeof(pLog->nGroupStackPos)); + memset(pLog->nGroupTicks, 0, sizeof(pLog->nGroupTicks)); + } +} + +void MicroProfileInitThreadLog() +{ + MicroProfileOnThreadCreate(nullptr); +} + + +struct MicroProfileScopeLock +{ + bool bUseLock; + std::recursive_mutex& m; + MicroProfileScopeLock(std::recursive_mutex& m) : bUseLock(g_bUseLock), m(m) + { + if(bUseLock) + m.lock(); + } + ~MicroProfileScopeLock() + { + if(bUseLock) + m.unlock(); + } +}; + +MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName) +{ + MicroProfileInit(); + MicroProfileScopeLock L(MicroProfileMutex()); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + if(!MP_STRCASECMP(pName, S.TimerInfo[i].pName) && !MP_STRCASECMP(pGroup, S.GroupInfo[S.TimerToGroup[i]].pName)) + { + return S.TimerInfo[i].nToken; + } + } + return MICROPROFILE_INVALID_TOKEN; +} + +uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type) +{ + for(uint32_t i = 0; i < S.nGroupCount; ++i) + { + if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName)) + { + return i; + } + } + uint16_t nGroupIndex = 0xffff; + uint32_t nLen = (uint32_t)strlen(pGroup); + if(nLen > MICROPROFILE_NAME_MAX_LEN-1) + nLen = MICROPROFILE_NAME_MAX_LEN-1; + memcpy(&S.GroupInfo[S.nGroupCount].pName[0], pGroup, nLen); + S.GroupInfo[S.nGroupCount].pName[nLen] = '\0'; + S.GroupInfo[S.nGroupCount].nNameLen = nLen; + S.GroupInfo[S.nGroupCount].nNumTimers = 0; + S.GroupInfo[S.nGroupCount].nGroupIndex = S.nGroupCount; + S.GroupInfo[S.nGroupCount].Type = Type; + S.GroupInfo[S.nGroupCount].nMaxTimerNameLen = 0; + S.GroupInfo[S.nGroupCount].nColor = 0x88888888; + S.GroupInfo[S.nGroupCount].nCategory = 0; + S.CategoryInfo[0].nGroupMask |= (1ll << (uint64_t)S.nGroupCount); + nGroupIndex = S.nGroupCount++; + S.nGroupMask = (S.nGroupMask<<1)|1; + MP_ASSERT(nGroupIndex < MICROPROFILE_MAX_GROUPS); + return nGroupIndex; +} + +void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor) +{ + int nCategoryIndex = -1; + for(uint32_t i = 0; i < S.nCategoryCount; ++i) + { + if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName)) + { + nCategoryIndex = (int)i; + break; + } + } + if(-1 == nCategoryIndex && S.nCategoryCount < MICROPROFILE_MAX_CATEGORIES) + { + MP_ASSERT(S.CategoryInfo[S.nCategoryCount].pName[0] == '\0'); + nCategoryIndex = (int)S.nCategoryCount++; + uint32_t nLen = (uint32_t)strlen(pCategory); + if(nLen > MICROPROFILE_NAME_MAX_LEN-1) + nLen = MICROPROFILE_NAME_MAX_LEN-1; + memcpy(&S.CategoryInfo[nCategoryIndex].pName[0], pCategory, nLen); + S.CategoryInfo[nCategoryIndex].pName[nLen] = '\0'; + } + uint16_t nGroup = MicroProfileGetGroup(pGroup, 0 != MP_STRCASECMP(pGroup, "gpu")?MicroProfileTokenTypeCpu : MicroProfileTokenTypeGpu); + S.GroupInfo[nGroup].nColor = nColor; + if(nCategoryIndex >= 0) + { + uint64_t nBit = 1ll << nGroup; + uint32_t nOldCategory = S.GroupInfo[nGroup].nCategory; + S.CategoryInfo[nOldCategory].nGroupMask &= ~nBit; + S.CategoryInfo[nCategoryIndex].nGroupMask |= nBit; + S.GroupInfo[nGroup].nCategory = nCategoryIndex; + } +} + +MicroProfileToken MicroProfileGetToken(const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type) +{ + MicroProfileInit(); + MicroProfileScopeLock L(MicroProfileMutex()); + MicroProfileToken ret = MicroProfileFindToken(pGroup, pName); + if(ret != MICROPROFILE_INVALID_TOKEN) + return ret; + uint16_t nGroupIndex = MicroProfileGetGroup(pGroup, Type); + uint16_t nTimerIndex = (uint16_t)(S.nTotalTimers++); + uint64_t nGroupMask = 1ll << nGroupIndex; + MicroProfileToken nToken = MicroProfileMakeToken(nGroupMask, nTimerIndex); + S.GroupInfo[nGroupIndex].nNumTimers++; + S.GroupInfo[nGroupIndex].nMaxTimerNameLen = MicroProfileMax(S.GroupInfo[nGroupIndex].nMaxTimerNameLen, (uint32_t)strlen(pName)); + MP_ASSERT(S.GroupInfo[nGroupIndex].Type == Type); //dont mix cpu & gpu timers in the same group + S.nMaxGroupSize = MicroProfileMax(S.nMaxGroupSize, S.GroupInfo[nGroupIndex].nNumTimers); + S.TimerInfo[nTimerIndex].nToken = nToken; + uint32_t nLen = (uint32_t)strlen(pName); + if(nLen > MICROPROFILE_NAME_MAX_LEN-1) + nLen = MICROPROFILE_NAME_MAX_LEN-1; + memcpy(&S.TimerInfo[nTimerIndex].pName, pName, nLen); + S.TimerInfo[nTimerIndex].pName[nLen] = '\0'; + S.TimerInfo[nTimerIndex].nNameLen = nLen; + S.TimerInfo[nTimerIndex].nColor = nColor&0xffffff; + S.TimerInfo[nTimerIndex].nGroupIndex = nGroupIndex; + S.TimerInfo[nTimerIndex].nTimerIndex = nTimerIndex; + S.TimerToGroup[nTimerIndex] = nGroupIndex; + return nToken; +} + +MicroProfileToken MicroProfileGetMetaToken(const char* pName) +{ + MicroProfileInit(); + MicroProfileScopeLock L(MicroProfileMutex()); + for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(!S.MetaCounters[i].pName) + { + S.MetaCounters[i].pName = pName; + return i; + } + else if(!MP_STRCASECMP(pName, S.MetaCounters[i].pName)) + { + return i; + } + } + MP_ASSERT(0);//out of slots, increase MICROPROFILE_META_MAX + return (MicroProfileToken)-1; +} + + +inline void MicroProfileLogPut(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLog* pLog) +{ + MP_ASSERT(pLog != 0); //this assert is hit if MicroProfileOnCreateThread is not called + MP_ASSERT(pLog->nActive); + uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed); + uint32_t nNextPos = (nPos+1) % MICROPROFILE_BUFFER_SIZE; + if(nNextPos == pLog->nGet.load(std::memory_order_relaxed)) + { + S.nOverflow = 100; + } + else + { + pLog->Log[nPos] = MicroProfileMakeLogIndex(nBegin, nToken_, nTick); + pLog->nPut.store(nNextPos, std::memory_order_release); + } +} + +uint64_t MicroProfileEnter(MicroProfileToken nToken_) +{ + if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup) + { + if(!MicroProfileGetThreadLog()) + { + MicroProfileInitThreadLog(); + } + uint64_t nTick = MP_TICK(); + MicroProfileLogPut(nToken_, nTick, MP_LOG_ENTER, MicroProfileGetThreadLog()); + return nTick; + } + return MICROPROFILE_INVALID_TICK; +} + +void MicroProfileMetaUpdate(MicroProfileToken nToken, int nCount, MicroProfileTokenType eTokenType) +{ + if((MP_DRAW_META_FIRST<<nToken) & S.nActiveBars) + { + MicroProfileThreadLog* pLog = MicroProfileTokenTypeCpu == eTokenType ? MicroProfileGetThreadLog() : g_MicroProfileGpuLog; + if(pLog) + { + MP_ASSERT(nToken < MICROPROFILE_META_MAX); + MicroProfileLogPut(nToken, nCount, MP_LOG_META, pLog); + } + } +} + + +void MicroProfileLeave(MicroProfileToken nToken_, uint64_t nTickStart) +{ + if(MICROPROFILE_INVALID_TICK != nTickStart) + { + if(!MicroProfileGetThreadLog()) + { + MicroProfileInitThreadLog(); + } + uint64_t nTick = MP_TICK(); + MicroProfileThreadLog* pLog = MicroProfileGetThreadLog(); + MicroProfileLogPut(nToken_, nTick, MP_LOG_LEAVE, pLog); + } +} + + +uint64_t MicroProfileGpuEnter(MicroProfileToken nToken_) +{ + if(MicroProfileGetGroupMask(nToken_) & S.nActiveGroup) + { + uint64_t nTimer = MicroProfileGpuInsertTimeStamp(); + MicroProfileLogPut(nToken_, nTimer, MP_LOG_ENTER, g_MicroProfileGpuLog); + MicroProfileLogPut(nToken_, MP_TICK(), MP_LOG_GPU_EXTRA, g_MicroProfileGpuLog); + return 1; + } + return 0; +} + +void MicroProfileGpuLeave(MicroProfileToken nToken_, uint64_t nTickStart) +{ + if(nTickStart) + { + uint64_t nTimer = MicroProfileGpuInsertTimeStamp(); + MicroProfileLogPut(nToken_, nTimer, MP_LOG_LEAVE, g_MicroProfileGpuLog); + MicroProfileLogPut(nToken_, MP_TICK(), MP_LOG_GPU_EXTRA, g_MicroProfileGpuLog); + } +} + +void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch) +{ + if(S.nRunning || pContextSwitch->nTicks <= S.nPauseTicks) + { + uint32_t nPut = S.nContextSwitchPut; + S.ContextSwitch[nPut] = *pContextSwitch; + S.nContextSwitchPut = (S.nContextSwitchPut+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; + } +} + + +void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2]) +{ + if(nPut > nGet) + { + nRange[0][0] = nGet; + nRange[0][1] = nPut; + nRange[1][0] = nRange[1][1] = 0; + } + else if(nPut != nGet) + { + MP_ASSERT(nGet != MICROPROFILE_BUFFER_SIZE); + uint32_t nCountEnd = MICROPROFILE_BUFFER_SIZE - nGet; + nRange[0][0] = nGet; + nRange[0][1] = nGet + nCountEnd; + nRange[1][0] = 0; + nRange[1][1] = nPut; + } +} + +void MicroProfileFlip() +{ + #if 0 + //verify LogEntry wraps correctly + MicroProfileLogEntry c = MP_LOG_TICK_MASK-5000; + for(int i = 0; i < 10000; ++i, c += 1) + { + MicroProfileLogEntry l2 = (c+2500) & MP_LOG_TICK_MASK; + MP_ASSERT(2500 == MicroProfileLogTickDifference(c, l2)); + } + #endif + MICROPROFILE_SCOPE(g_MicroProfileFlip); + std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex()); + + + MicroProfileGpuFlip(); + + if(S.nToggleRunning) + { + S.nRunning = !S.nRunning; + if(!S.nRunning) + S.nPauseTicks = MP_TICK(); + S.nToggleRunning = 0; + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + MicroProfileThreadLog* pLog = S.Pool[i]; + if(pLog) + { + pLog->nStackPos = 0; + } + } + } + uint32_t nAggregateClear = S.nAggregateClear || S.nAutoClearFrames, nAggregateFlip = 0; + if(S.nDumpFileNextFrame) + { + MicroProfileDumpToFile(); + S.nDumpFileNextFrame = 0; + S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage + } + if(S.nWebServerDataSent == (uint64_t)-1) + { + MicroProfileWebServerStart(); + S.nWebServerDataSent = 0; + } + + if(MicroProfileWebServerUpdate()) + { + S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; //hide spike from dumping webpage + } + + if(S.nAutoClearFrames) + { + nAggregateClear = 1; + nAggregateFlip = 1; + S.nAutoClearFrames -= 1; + } + + + if(S.nRunning || S.nForceEnable) + { + S.nFramePutIndex++; + S.nFramePut = (S.nFramePut+1) % MICROPROFILE_MAX_FRAME_HISTORY; + MP_ASSERT((S.nFramePutIndex % MICROPROFILE_MAX_FRAME_HISTORY) == S.nFramePut); + S.nFrameCurrent = (S.nFramePut + MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 1) % MICROPROFILE_MAX_FRAME_HISTORY; + S.nFrameCurrentIndex++; + uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY; + + uint32_t nContextSwitchPut = S.nContextSwitchPut; + if(S.nContextSwitchLastPut < nContextSwitchPut) + { + S.nContextSwitchUsage = (nContextSwitchPut - S.nContextSwitchLastPut); + } + else + { + S.nContextSwitchUsage = MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - S.nContextSwitchLastPut + nContextSwitchPut; + } + S.nContextSwitchLastPut = nContextSwitchPut; + + MicroProfileFrameState* pFramePut = &S.Frames[S.nFramePut]; + MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent]; + MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext]; + + pFramePut->nFrameStartCpu = MP_TICK(); + pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp(); + if(pFrameNext->nFrameStartGpu != (uint64_t)-1) + pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu); + + if(pFrameCurrent->nFrameStartGpu == (uint64_t)-1) + pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1; + + uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu; + uint64_t nFrameEndCpu = pFrameNext->nFrameStartCpu; + + { + uint64_t nTick = nFrameEndCpu - nFrameStartCpu; + S.nFlipTicks = nTick; + S.nFlipAggregate += nTick; + S.nFlipMax = MicroProfileMax(S.nFlipMax, nTick); + } + + uint8_t* pTimerToGroup = &S.TimerToGroup[0]; + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + MicroProfileThreadLog* pLog = S.Pool[i]; + if(!pLog) + { + pFramePut->nLogStart[i] = 0; + } + else + { + uint32_t nPut = pLog->nPut.load(std::memory_order_acquire); + pFramePut->nLogStart[i] = nPut; + MP_ASSERT(nPut< MICROPROFILE_BUFFER_SIZE); + //need to keep last frame around to close timers. timers more than 1 frame old is ditched. + pLog->nGet.store(nPut, std::memory_order_relaxed); + } + } + + if(S.nRunning) + { + uint64_t* pFrameGroup = &S.FrameGroup[0]; + { + MICROPROFILE_SCOPE(g_MicroProfileClear); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + S.Frame[i].nTicks = 0; + S.Frame[i].nCount = 0; + S.FrameExclusive[i] = 0; + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + pFrameGroup[i] = 0; + } + for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j))) + { + auto& Meta = S.MetaCounters[j]; + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + Meta.nCounters[i] = 0; + } + } + } + + } + { + MICROPROFILE_SCOPE(g_MicroProfileThreadLoop); + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + MicroProfileThreadLog* pLog = S.Pool[i]; + if(!pLog) + continue; + + uint8_t* pGroupStackPos = &pLog->nGroupStackPos[0]; + int64_t nGroupTicks[MICROPROFILE_MAX_GROUPS] = {0}; + + + uint32_t nPut = pFrameNext->nLogStart[i]; + uint32_t nGet = pFrameCurrent->nLogStart[i]; + uint32_t nRange[2][2] = { {0, 0}, {0, 0}, }; + MicroProfileGetRange(nPut, nGet, nRange); + + + //fetch gpu results. + if(pLog->nGpu) + { + for(uint32_t j = 0; j < 2; ++j) + { + uint32_t nStart = nRange[j][0]; + uint32_t nEnd = nRange[j][1]; + for(uint32_t k = nStart; k < nEnd; ++k) + { + MicroProfileLogEntry L = pLog->Log[k]; + if(MicroProfileLogType(L) < MP_LOG_META) + { + pLog->Log[k] = MicroProfileLogSetTick(L, MicroProfileGpuGetTimeStamp((uint32_t)MicroProfileLogGetTick(L))); + } + } + } + } + + + uint32_t* pStack = &pLog->nStack[0]; + int64_t* pChildTickStack = &pLog->nChildTickStack[0]; + uint32_t nStackPos = pLog->nStackPos; + + for(uint32_t j = 0; j < 2; ++j) + { + uint32_t nStart = nRange[j][0]; + uint32_t nEnd = nRange[j][1]; + for(uint32_t k = nStart; k < nEnd; ++k) + { + MicroProfileLogEntry LE = pLog->Log[k]; + int nType = MicroProfileLogType(LE); + + if(MP_LOG_ENTER == nType) + { + int nTimer = MicroProfileLogTimerIndex(LE); + uint8_t nGroup = pTimerToGroup[nTimer]; + MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX); + MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS); + pGroupStackPos[nGroup]++; + pStack[nStackPos++] = k; + pChildTickStack[nStackPos] = 0; + + } + else if(MP_LOG_META == nType) + { + if(nStackPos) + { + int64_t nMetaIndex = MicroProfileLogTimerIndex(LE); + int64_t nMetaCount = MicroProfileLogGetTick(LE); + MP_ASSERT(nMetaIndex < MICROPROFILE_META_MAX); + int64_t nCounter = MicroProfileLogTimerIndex(pLog->Log[pStack[nStackPos-1]]); + S.MetaCounters[nMetaIndex].nCounters[nCounter] += nMetaCount; + } + } + else if(MP_LOG_LEAVE == nType) + { + int nTimer = MicroProfileLogTimerIndex(LE); + uint8_t nGroup = pTimerToGroup[nTimer]; + MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS); + if(nStackPos) + { + int64_t nTickStart = pLog->Log[pStack[nStackPos-1]]; + int64_t nTicks = MicroProfileLogTickDifference(nTickStart, LE); + int64_t nChildTicks = pChildTickStack[nStackPos]; + nStackPos--; + pChildTickStack[nStackPos] += nTicks; + + uint32_t nTimerIndex = MicroProfileLogTimerIndex(LE); + S.Frame[nTimerIndex].nTicks += nTicks; + S.FrameExclusive[nTimerIndex] += (nTicks-nChildTicks); + S.Frame[nTimerIndex].nCount += 1; + + MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS); + uint8_t nGroupStackPos = pGroupStackPos[nGroup]; + if(nGroupStackPos) + { + nGroupStackPos--; + if(0 == nGroupStackPos) + { + nGroupTicks[nGroup] += nTicks; + } + pGroupStackPos[nGroup] = nGroupStackPos; + } + } + } + } + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + pLog->nGroupTicks[i] += nGroupTicks[i]; + pFrameGroup[i] += nGroupTicks[i]; + } + pLog->nStackPos = nStackPos; + } + } + { + MICROPROFILE_SCOPE(g_MicroProfileAccumulate); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + S.AccumTimers[i].nTicks += S.Frame[i].nTicks; + S.AccumTimers[i].nCount += S.Frame[i].nCount; + S.AccumMaxTimers[i] = MicroProfileMax(S.AccumMaxTimers[i], S.Frame[i].nTicks); + S.AccumTimersExclusive[i] += S.FrameExclusive[i]; + S.AccumMaxTimersExclusive[i] = MicroProfileMax(S.AccumMaxTimersExclusive[i], S.FrameExclusive[i]); + } + + for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + S.AccumGroup[i] += pFrameGroup[i]; + S.AccumGroupMax[i] = MicroProfileMax(S.AccumGroupMax[i], pFrameGroup[i]); + } + + for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j))) + { + auto& Meta = S.MetaCounters[j]; + uint64_t nSum = 0;; + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + uint64_t nCounter = Meta.nCounters[i]; + Meta.nAccumMax[i] = MicroProfileMax(Meta.nAccumMax[i], nCounter); + Meta.nAccum[i] += nCounter; + nSum += nCounter; + } + Meta.nSumAccum += nSum; + Meta.nSumAccumMax = MicroProfileMax(Meta.nSumAccumMax, nSum); + } + } + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN) + { + MicroProfileToken nToken = S.Graph[i].nToken; + S.Graph[i].nHistory[S.nGraphPut] = S.Frame[MicroProfileGetTimerIndex(nToken)].nTicks; + } + } + S.nGraphPut = (S.nGraphPut+1) % MICROPROFILE_GRAPH_HISTORY; + + } + + + if(S.nRunning && S.nAggregateFlip <= ++S.nAggregateFlipCount) + { + nAggregateFlip = 1; + if(S.nAggregateFlip) // if 0 accumulate indefinitely + { + nAggregateClear = 1; + } + } + } + if(nAggregateFlip) + { + memcpy(&S.Aggregate[0], &S.AccumTimers[0], sizeof(S.Aggregate[0]) * S.nTotalTimers); + memcpy(&S.AggregateMax[0], &S.AccumMaxTimers[0], sizeof(S.AggregateMax[0]) * S.nTotalTimers); + memcpy(&S.AggregateExclusive[0], &S.AccumTimersExclusive[0], sizeof(S.AggregateExclusive[0]) * S.nTotalTimers); + memcpy(&S.AggregateMaxExclusive[0], &S.AccumMaxTimersExclusive[0], sizeof(S.AggregateMaxExclusive[0]) * S.nTotalTimers); + + memcpy(&S.AggregateGroup[0], &S.AccumGroup[0], sizeof(S.AggregateGroup)); + memcpy(&S.AggregateGroupMax[0], &S.AccumGroupMax[0], sizeof(S.AggregateGroup)); + + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + MicroProfileThreadLog* pLog = S.Pool[i]; + if(!pLog) + continue; + + memcpy(&pLog->nAggregateGroupTicks[0], &pLog->nGroupTicks[0], sizeof(pLog->nAggregateGroupTicks)); + + if(nAggregateClear) + { + memset(&pLog->nGroupTicks[0], 0, sizeof(pLog->nGroupTicks)); + } + } + + for(uint32_t j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName && 0 != (S.nActiveBars & (MP_DRAW_META_FIRST<<j))) + { + auto& Meta = S.MetaCounters[j]; + memcpy(&Meta.nAggregateMax[0], &Meta.nAccumMax[0], sizeof(Meta.nAggregateMax[0]) * S.nTotalTimers); + memcpy(&Meta.nAggregate[0], &Meta.nAccum[0], sizeof(Meta.nAggregate[0]) * S.nTotalTimers); + Meta.nSumAggregate = Meta.nSumAccum; + Meta.nSumAggregateMax = Meta.nSumAccumMax; + if(nAggregateClear) + { + memset(&Meta.nAccumMax[0], 0, sizeof(Meta.nAccumMax[0]) * S.nTotalTimers); + memset(&Meta.nAccum[0], 0, sizeof(Meta.nAccum[0]) * S.nTotalTimers); + Meta.nSumAccum = 0; + Meta.nSumAccumMax = 0; + } + } + } + + + + + + S.nAggregateFrames = S.nAggregateFlipCount; + S.nFlipAggregateDisplay = S.nFlipAggregate; + S.nFlipMaxDisplay = S.nFlipMax; + if(nAggregateClear) + { + memset(&S.AccumTimers[0], 0, sizeof(S.Aggregate[0]) * S.nTotalTimers); + memset(&S.AccumMaxTimers[0], 0, sizeof(S.AccumMaxTimers[0]) * S.nTotalTimers); + memset(&S.AccumTimersExclusive[0], 0, sizeof(S.AggregateExclusive[0]) * S.nTotalTimers); + memset(&S.AccumMaxTimersExclusive[0], 0, sizeof(S.AccumMaxTimersExclusive[0]) * S.nTotalTimers); + memset(&S.AccumGroup[0], 0, sizeof(S.AggregateGroup)); + memset(&S.AccumGroupMax[0], 0, sizeof(S.AggregateGroup)); + + S.nAggregateFlipCount = 0; + S.nFlipAggregate = 0; + S.nFlipMax = 0; + + S.nAggregateFlipTick = MP_TICK(); + } + } + S.nAggregateClear = 0; + + uint64_t nNewActiveGroup = 0; + if(S.nForceEnable || (S.nDisplay && S.nRunning)) + nNewActiveGroup = S.nAllGroupsWanted ? S.nGroupMask : S.nActiveGroupWanted; + nNewActiveGroup |= S.nForceGroup; + nNewActiveGroup |= S.nForceGroupUI; + if(S.nActiveGroup != nNewActiveGroup) + S.nActiveGroup = nNewActiveGroup; + uint32_t nNewActiveBars = 0; + if(S.nDisplay && S.nRunning) + nNewActiveBars = S.nBars; + if(S.nForceMetaCounters) + { + for(int i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(S.MetaCounters[i].pName) + { + nNewActiveBars |= (MP_DRAW_META_FIRST<<i); + } + } + } + if(nNewActiveBars != S.nActiveBars) + S.nActiveBars = nNewActiveBars; +} + +void MicroProfileSetForceEnable(bool bEnable) +{ + S.nForceEnable = bEnable ? 1 : 0; +} +bool MicroProfileGetForceEnable() +{ + return S.nForceEnable != 0; +} + +void MicroProfileSetEnableAllGroups(bool bEnableAllGroups) +{ + S.nAllGroupsWanted = bEnableAllGroups ? 1 : 0; +} + +void MicroProfileEnableCategory(const char* pCategory, bool bEnabled) +{ + int nCategoryIndex = -1; + for(uint32_t i = 0; i < S.nCategoryCount; ++i) + { + if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName)) + { + nCategoryIndex = (int)i; + break; + } + } + if(nCategoryIndex >= 0) + { + if(bEnabled) + { + S.nActiveGroupWanted |= S.CategoryInfo[nCategoryIndex].nGroupMask; + } + else + { + S.nActiveGroupWanted &= ~S.CategoryInfo[nCategoryIndex].nGroupMask; + } + } +} + + +void MicroProfileEnableCategory(const char* pCategory) +{ + MicroProfileEnableCategory(pCategory, true); +} +void MicroProfileDisableCategory(const char* pCategory) +{ + MicroProfileEnableCategory(pCategory, false); +} + +bool MicroProfileGetEnableAllGroups() +{ + return 0 != S.nAllGroupsWanted; +} + +void MicroProfileSetForceMetaCounters(bool bForce) +{ + S.nForceMetaCounters = bForce ? 1 : 0; +} + +bool MicroProfileGetForceMetaCounters() +{ + return 0 != S.nForceMetaCounters; +} + +void MicroProfileEnableMetaCounter(const char* pMeta) +{ + for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta)) + { + S.nBars |= (MP_DRAW_META_FIRST<<i); + return; + } + } +} +void MicroProfileDisableMetaCounter(const char* pMeta) +{ + for(uint32_t i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(S.MetaCounters[i].pName && 0 == MP_STRCASECMP(S.MetaCounters[i].pName, pMeta)) + { + S.nBars &= ~(MP_DRAW_META_FIRST<<i); + return; + } + } +} + + +void MicroProfileSetAggregateFrames(int nFrames) +{ + S.nAggregateFlip = (uint32_t)nFrames; + if(0 == nFrames) + { + S.nAggregateClear = 1; + } +} + +int MicroProfileGetAggregateFrames() +{ + return S.nAggregateFlip; +} + +int MicroProfileGetCurrentAggregateFrames() +{ + return int(S.nAggregateFlip ? S.nAggregateFlip : S.nAggregateFlipCount); +} + + +void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type) +{ + MicroProfileInit(); + std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex()); + uint16_t nGroup = MicroProfileGetGroup(pGroup, Type); + S.nForceGroup |= (1ll << nGroup); +} + +void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type) +{ + MicroProfileInit(); + std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex()); + uint16_t nGroup = MicroProfileGetGroup(pGroup, Type); + S.nForceGroup &= ~(1ll << nGroup); +} + + +void MicroProfileCalcAllTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize) +{ + for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i) + { + const uint32_t nGroupId = S.TimerInfo[i].nGroupIndex; + const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + uint32_t nTimer = i; + uint32_t nIdx = i * 2; + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1; + float fToPrc = S.fRcpReferenceTime; + float fMs = fToMs * (S.Frame[nTimer].nTicks); + float fPrc = MicroProfileMin(fMs * fToPrc, 1.f); + float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames); + float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f); + float fMaxMs = fToMs * (S.AggregateMax[nTimer]); + float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f); + float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount); + float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f); + float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]); + float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f); + float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames); + float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f); + float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]); + float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f); + float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks; + pTimers[nIdx] = fMs; + pTimers[nIdx+1] = fPrc; + pAverage[nIdx] = fAverageMs; + pAverage[nIdx+1] = fAveragePrc; + pMax[nIdx] = fMaxMs; + pMax[nIdx+1] = fMaxPrc; + pCallAverage[nIdx] = fCallAverageMs; + pCallAverage[nIdx+1] = fCallAveragePrc; + pExclusive[nIdx] = fMsExclusive; + pExclusive[nIdx+1] = fPrcExclusive; + pAverageExclusive[nIdx] = fAverageMsExclusive; + pAverageExclusive[nIdx+1] = fAveragePrcExclusive; + pMaxExclusive[nIdx] = fMaxMsExclusive; + pMaxExclusive[nIdx+1] = fMaxPrcExclusive; + pTotal[nIdx] = fTotalMs; + pTotal[nIdx+1] = 0.f; + } +} + +void MicroProfileTogglePause() +{ + S.nToggleRunning = 1; +} + +float MicroProfileGetTime(const char* pGroup, const char* pName) +{ + MicroProfileToken nToken = MicroProfileFindToken(pGroup, pName); + if(nToken == MICROPROFILE_INVALID_TOKEN) + { + return 0.f; + } + uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken); + uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken); + float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + return S.Frame[nTimerIndex].nTicks * fToMs; +} + + +void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu) +{ + MICROPROFILE_SCOPE(g_MicroProfileContextSwitchSearch); + uint32_t nContextSwitchPut = S.nContextSwitchPut; + uint64_t nContextSwitchStart, nContextSwitchEnd; + nContextSwitchStart = nContextSwitchEnd = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; + int64_t nSearchEnd = nBaseTicksEndCpu + MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu()); + int64_t nSearchBegin = nBaseTicksCpu - MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu()); + for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i) + { + uint32_t nIndex = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - (i+1)) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; + MicroProfileContextSwitch& CS = S.ContextSwitch[nIndex]; + if(CS.nTicks > nSearchEnd) + { + nContextSwitchEnd = nIndex; + } + if(CS.nTicks > nSearchBegin) + { + nContextSwitchStart = nIndex; + } + } + *pContextSwitchStart = nContextSwitchStart; + *pContextSwitchEnd = nContextSwitchEnd; +} + + + +#if MICROPROFILE_WEBSERVER + +#define MICROPROFILE_EMBED_HTML + +extern const char* g_MicroProfileHtml_begin[]; +extern size_t g_MicroProfileHtml_begin_sizes[]; +extern size_t g_MicroProfileHtml_begin_count; +extern const char* g_MicroProfileHtml_end[]; +extern size_t g_MicroProfileHtml_end_sizes[]; +extern size_t g_MicroProfileHtml_end_count; + +typedef void MicroProfileWriteCallback(void* Handle, size_t size, const char* pData); + +uint32_t MicroProfileWebServerPort() +{ + return S.nWebServerPort; +} + +void MicroProfileDumpFile(const char* pHtml, const char* pCsv) +{ + S.nDumpFileNextFrame = 0; + if(pHtml) + { + uint32_t nLen = strlen(pHtml); + if(nLen > sizeof(S.HtmlDumpPath)-1) + { + return; + } + memcpy(S.HtmlDumpPath, pHtml, nLen+1); + S.nDumpFileNextFrame |= 1; + } + if(pCsv) + { + uint32_t nLen = strlen(pCsv); + if(nLen > sizeof(S.CsvDumpPath)-1) + { + return; + } + memcpy(S.CsvDumpPath, pCsv, nLen+1); + S.nDumpFileNextFrame |= 2; + } +} + +void MicroProfilePrintf(MicroProfileWriteCallback CB, void* Handle, const char* pFmt, ...) +{ + char buffer[32*1024]; + va_list args; + va_start (args, pFmt); +#ifdef _WIN32 + size_t size = vsprintf_s(buffer, pFmt, args); +#else + size_t size = vsnprintf(buffer, sizeof(buffer)-1, pFmt, args); +#endif + CB(Handle, size, &buffer[0]); + va_end (args); +} + +#define printf(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__) +void MicroProfileDumpCsv(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames) +{ + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu()); + + printf("frames,%d\n", nAggregateFrames); + printf("group,name,average,max,callaverage\n"); + + uint32_t nNumTimers = S.nTotalTimers; + uint32_t nBlockSize = 2 * nNumTimers; + float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float)); + float* pAverage = pTimers + nBlockSize; + float* pMax = pTimers + 2 * nBlockSize; + float* pCallAverage = pTimers + 3 * nBlockSize; + float* pTimersExclusive = pTimers + 4 * nBlockSize; + float* pAverageExclusive = pTimers + 5 * nBlockSize; + float* pMaxExclusive = pTimers + 6 * nBlockSize; + float* pTotal = pTimers + 7 * nBlockSize; + + MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers); + + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + uint32_t nIdx = i * 2; + printf("\"%s\",\"%s\",%f,%f,%f\n", S.TimerInfo[i].pName, S.GroupInfo[S.TimerInfo[i].nGroupIndex].pName, pAverage[nIdx], pMax[nIdx], pCallAverage[nIdx]); + } + + printf("\n\n"); + + printf("group,average,max,total\n"); + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + const char* pGroupName = S.GroupInfo[j].pName; + float fToMs = S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU; + if(pGroupName[0] != '\0') + { + printf("\"%s\",%.3f,%.3f,%.3f\n", pGroupName, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j]); + } + } + + printf("\n\n"); + printf("group,thread,average,total\n"); + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + const char* pThreadName = &S.Pool[i]->ThreadName[0]; + // MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i); + float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU; + { + uint64_t nTicks = S.Pool[i]->nAggregateGroupTicks[j]; + float fTime = nTicks / nAggregateFrames * fToMs; + float fTimeTotal = nTicks * fToMs; + if(fTimeTotal > 0.01f) + { + const char* pGroupName = S.GroupInfo[j].pName; + printf("\"%s\",\"%s\",%.3f,%.3f\n", pGroupName, pThreadName, fTime, fTimeTotal); + } + } + } + } + } + + printf("\n\n"); + printf("frametimecpu\n"); + + const uint32_t nCount = MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3; + const uint32_t nStart = S.nFrameCurrent; + for(uint32_t i = nCount; i > 0; i--) + { + uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY; + uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY; + uint64_t nTicks = S.Frames[nFrameNext].nFrameStartCpu - S.Frames[nFrame].nFrameStartCpu; + printf("%f,", nTicks * fToMsCPU); + } + printf("\n"); + + printf("\n\n"); + printf("frametimegpu\n"); + + for(uint32_t i = nCount; i > 0; i--) + { + uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY; + uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY; + uint64_t nTicks = S.Frames[nFrameNext].nFrameStartGpu - S.Frames[nFrame].nFrameStartGpu; + printf("%f,", nTicks * fToMsGPU); + } + printf("\n\n"); + printf("Meta\n");//only single frame snapshot + printf("name,average,max,total\n"); + for(int j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName) + { + printf("\"%s\",%f,%lld,%lld\n",S.MetaCounters[j].pName, S.MetaCounters[j].nSumAggregate / (float)nAggregateFrames, S.MetaCounters[j].nSumAggregateMax,S.MetaCounters[j].nSumAggregate); + } + } +} +#undef printf + +void MicroProfileDumpHtml(MicroProfileWriteCallback CB, void* Handle, int nMaxFrames, const char* pHost) +{ + uint32_t nRunning = S.nRunning; + S.nRunning = 0; + //stall pushing of timers + uint64_t nActiveGroup = S.nActiveGroup; + S.nActiveGroup = 0; + S.nPauseTicks = MP_TICK(); + + + for(size_t i = 0; i < g_MicroProfileHtml_begin_count; ++i) + { + CB(Handle, g_MicroProfileHtml_begin_sizes[i]-1, g_MicroProfileHtml_begin[i]); + } + //dump info + uint64_t nTicks = MP_TICK(); + + float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu()); + float fAggregateMs = fToMsCPU * (nTicks - S.nAggregateFlipTick); + MicroProfilePrintf(CB, Handle, "var DumpHost = '%s';\n", pHost ? pHost : ""); + time_t CaptureTime; + time(&CaptureTime); + MicroProfilePrintf(CB, Handle, "var DumpUtcCaptureTime = %ld;\n", CaptureTime); + MicroProfilePrintf(CB, Handle, "var AggregateInfo = {'Frames':%d, 'Time':%f};\n", S.nAggregateFrames, fAggregateMs); + + //categories + MicroProfilePrintf(CB, Handle, "var CategoryInfo = Array(%d);\n",S.nCategoryCount); + for(uint32_t i = 0; i < S.nCategoryCount; ++i) + { + MicroProfilePrintf(CB, Handle, "CategoryInfo[%d] = \"%s\";\n", i, S.CategoryInfo[i].pName); + } + + //groups + MicroProfilePrintf(CB, Handle, "var GroupInfo = Array(%d);\n\n",S.nGroupCount); + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + float fRcpAggregateFrames = 1.f / nAggregateFrames; + for(uint32_t i = 0; i < S.nGroupCount; ++i) + { + MP_ASSERT(i == S.GroupInfo[i].nGroupIndex); + float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fToMsCPU : fToMsGPU; + MicroProfilePrintf(CB, Handle, "GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, '#%02x%02x%02x');\n", + S.GroupInfo[i].nGroupIndex, + S.GroupInfo[i].nGroupIndex, + S.GroupInfo[i].pName, + S.GroupInfo[i].nCategory, + S.GroupInfo[i].nNumTimers, + S.GroupInfo[i].Type == MicroProfileTokenTypeGpu?1:0, + fToMs * S.AggregateGroup[i], + fToMs * S.AggregateGroup[i] / nAggregateFrames, + fToMs * S.AggregateGroupMax[i], + MICROPROFILE_UNPACK_RED(S.GroupInfo[i].nColor) & 0xff, + MICROPROFILE_UNPACK_GREEN(S.GroupInfo[i].nColor) & 0xff, + MICROPROFILE_UNPACK_BLUE(S.GroupInfo[i].nColor) & 0xff); + } + //timers + + uint32_t nNumTimers = S.nTotalTimers; + uint32_t nBlockSize = 2 * nNumTimers; + float* pTimers = (float*)alloca(nBlockSize * 8 * sizeof(float)); + float* pAverage = pTimers + nBlockSize; + float* pMax = pTimers + 2 * nBlockSize; + float* pCallAverage = pTimers + 3 * nBlockSize; + float* pTimersExclusive = pTimers + 4 * nBlockSize; + float* pAverageExclusive = pTimers + 5 * nBlockSize; + float* pMaxExclusive = pTimers + 6 * nBlockSize; + float* pTotal = pTimers + 7 * nBlockSize; + + MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers); + + MicroProfilePrintf(CB, Handle, "\nvar TimerInfo = Array(%d);\n\n", S.nTotalTimers); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + uint32_t nIdx = i * 2; + MP_ASSERT(i == S.TimerInfo[i].nTimerIndex); + MicroProfilePrintf(CB, Handle, "var Meta%d = [", i); + bool bOnce = true; + for(int j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName) + { + uint32_t lala = S.MetaCounters[j].nCounters[i]; + MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", lala); + bOnce = false; + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var MetaAvg%d = [", i); + bOnce = true; + for(int j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName) + { + MicroProfilePrintf(CB, Handle, bOnce ? "%f" : ",%f", fRcpAggregateFrames * S.MetaCounters[j].nAggregate[i]); + bOnce = false; + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var MetaMax%d = [", i); + bOnce = true; + for(int j = 0; j < MICROPROFILE_META_MAX; ++j) + { + if(S.MetaCounters[j].pName) + { + MicroProfilePrintf(CB, Handle, bOnce ? "%d" : ",%d", S.MetaCounters[j].nAggregateMax[i]); + bOnce = false; + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + + + uint32_t nColor = S.TimerInfo[i].nColor; + uint32_t nColorDark = (nColor >> 1) & ~0x80808080; + MicroProfilePrintf(CB, Handle, "TimerInfo[%d] = MakeTimer(%d, \"%s\", %d, '#%02x%02x%02x','#%02x%02x%02x', %f, %f, %f, %f, %f, %d, %f, Meta%d, MetaAvg%d, MetaMax%d);\n", S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].nTimerIndex, S.TimerInfo[i].pName, S.TimerInfo[i].nGroupIndex, + MICROPROFILE_UNPACK_RED(nColor) & 0xff, + MICROPROFILE_UNPACK_GREEN(nColor) & 0xff, + MICROPROFILE_UNPACK_BLUE(nColor) & 0xff, + MICROPROFILE_UNPACK_RED(nColorDark) & 0xff, + MICROPROFILE_UNPACK_GREEN(nColorDark) & 0xff, + MICROPROFILE_UNPACK_BLUE(nColorDark) & 0xff, + pAverage[nIdx], + pMax[nIdx], + pAverageExclusive[nIdx], + pMaxExclusive[nIdx], + pCallAverage[nIdx], + S.Aggregate[i].nCount, + pTotal[nIdx], + i,i,i); + + } + + MicroProfilePrintf(CB, Handle, "\nvar ThreadNames = ["); + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "'%s',", S.Pool[i]->ThreadName); + } + else + { + MicroProfilePrintf(CB, Handle, "'Thread %d',", i); + } + } + MicroProfilePrintf(CB, Handle, "];\n\n"); + + + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i); + float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU; + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j]/nAggregateFrames * fToMs); + } + MicroProfilePrintf(CB, Handle, "];\n"); + } + } + MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeArray = ["); + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "ThreadGroupTime%d,", i); + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + + + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "var ThreadGroupTimeTotal%d = [", i); + float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU; + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] * fToMs); + } + MicroProfilePrintf(CB, Handle, "];\n"); + } + } + MicroProfilePrintf(CB, Handle, "\nvar ThreadGroupTimeTotalArray = ["); + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + MicroProfilePrintf(CB, Handle, "ThreadGroupTimeTotal%d,", i); + } + } + MicroProfilePrintf(CB, Handle, "];"); + + + + + MicroProfilePrintf(CB, Handle, "\nvar ThreadIds = ["); + for(uint32_t i = 0; i < S.nNumLogs; ++i) + { + if(S.Pool[i]) + { + ThreadIdType ThreadId = S.Pool[i]->nThreadId; + if(!ThreadId) + { + ThreadId = (ThreadIdType)-1; + } + MicroProfilePrintf(CB, Handle, "%d,", ThreadId); + } + else + { + MicroProfilePrintf(CB, Handle, "-1,", i); + } + } + MicroProfilePrintf(CB, Handle, "];\n\n"); + + MicroProfilePrintf(CB, Handle, "\nvar MetaNames = ["); + for(int i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(S.MetaCounters[i].pName) + { + MicroProfilePrintf(CB, Handle, "'%s',", S.MetaCounters[i].pName); + } + } + + + MicroProfilePrintf(CB, Handle, "];\n\n"); + + + + uint32_t nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); //leave a few to not overwrite + nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames); + + + uint32_t nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY; + uint32_t nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY; + MP_ASSERT(nLastFrame == (S.nFrameCurrent % MICROPROFILE_MAX_FRAME_HISTORY)); + MP_ASSERT(nFirstFrame < MICROPROFILE_MAX_FRAME_HISTORY); + MP_ASSERT(nLastFrame < MICROPROFILE_MAX_FRAME_HISTORY); + const int64_t nTickStart = S.Frames[nFirstFrame].nFrameStartCpu; + const int64_t nTickEnd = S.Frames[nLastFrame].nFrameStartCpu; + int64_t nTickStartGpu = S.Frames[nFirstFrame].nFrameStartGpu; + + int64_t nTickReferenceCpu, nTickReferenceGpu; + int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu(); + int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu(); + int nTickReference = 0; + if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu)) + { + nTickStartGpu = (nTickStart - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu; + nTickReference = 1; + } + + +#if MICROPROFILE_DEBUG + printf("dumping %d frames\n", nNumFrames); + printf("dumping frame %d to %d\n", nFirstFrame, nLastFrame); +#endif + + + uint32_t* nTimerCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers); + memset(nTimerCounter, 0, sizeof(uint32_t) * S.nTotalTimers); + + MicroProfilePrintf(CB, Handle, "var Frames = Array(%d);\n", nNumFrames); + for(uint32_t i = 0; i < nNumFrames; ++i) + { + uint32_t nFrameIndex = (nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY; + uint32_t nFrameIndexNext = (nFrameIndex + 1) % MICROPROFILE_MAX_FRAME_HISTORY; + + for(uint32_t j = 0; j < S.nNumLogs; ++j) + { + MicroProfileThreadLog* pLog = S.Pool[j]; + int64_t nStartTickBase = pLog->nGpu ? nTickStartGpu : nTickStart; + uint32_t nLogStart = S.Frames[nFrameIndex].nLogStart[j]; + uint32_t nLogEnd = S.Frames[nFrameIndexNext].nLogStart[j]; + + float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu); + float fToMsBase = MicroProfileTickToMsMultiplier(pLog->nGpu ? nTicksPerSecondGpu : nTicksPerSecondCpu); + MicroProfilePrintf(CB, Handle, "var ts_%d_%d = [", i, j); + if(nLogStart != nLogEnd) + { + uint32_t k = nLogStart; + uint32_t nLogType = MicroProfileLogType(pLog->Log[k]); + float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase; + int64_t nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase; + float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs; + MicroProfilePrintf(CB, Handle, "%f", fTime); + for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE) + { + uint32_t nLogType = MicroProfileLogType(pLog->Log[k]); + float fToMs = nLogType == MP_LOG_GPU_EXTRA ? fToMsCpu : fToMsBase; + nStartTick = nLogType == MP_LOG_GPU_EXTRA ? nTickStart : nStartTickBase; + float fTime = nLogType == MP_LOG_META ? 0.f : MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs; + MicroProfilePrintf(CB, Handle, ",%f", fTime); + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var tt_%d_%d = [", i, j); + if(nLogStart != nLogEnd) + { + uint32_t k = nLogStart; + MicroProfilePrintf(CB, Handle, "%d", MicroProfileLogType(pLog->Log[k])); + for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE) + { + uint32_t nLogType = MicroProfileLogType(pLog->Log[k]); + if(nLogType == MP_LOG_META) + { + //for meta, store the count + 3, which is the tick part + nLogType = 3 + MicroProfileLogGetTick(pLog->Log[k]); + } + MicroProfilePrintf(CB, Handle, ",%d", nLogType); + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + + MicroProfilePrintf(CB, Handle, "var ti_%d_%d = [", i, j); + if(nLogStart != nLogEnd) + { + uint32_t k = nLogStart; + MicroProfilePrintf(CB, Handle, "%d", (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k])); + for(k = (k+1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k+1) % MICROPROFILE_BUFFER_SIZE) + { + uint32_t nTimerIndex = (uint32_t)MicroProfileLogTimerIndex(pLog->Log[k]); + MicroProfilePrintf(CB, Handle, ",%d", nTimerIndex); + nTimerCounter[nTimerIndex]++; + } + } + MicroProfilePrintf(CB, Handle, "];\n"); + + } + + MicroProfilePrintf(CB, Handle, "var ts%d = [", i); + for(uint32_t j = 0; j < S.nNumLogs; ++j) + { + MicroProfilePrintf(CB, Handle, "ts_%d_%d,", i, j); + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var tt%d = [", i); + for(uint32_t j = 0; j < S.nNumLogs; ++j) + { + MicroProfilePrintf(CB, Handle, "tt_%d_%d,", i, j); + } + MicroProfilePrintf(CB, Handle, "];\n"); + + MicroProfilePrintf(CB, Handle, "var ti%d = [", i); + for(uint32_t j = 0; j < S.nNumLogs; ++j) + { + MicroProfilePrintf(CB, Handle, "ti_%d_%d,", i, j); + } + MicroProfilePrintf(CB, Handle, "];\n"); + + + int64_t nFrameStart = S.Frames[nFrameIndex].nFrameStartCpu; + int64_t nFrameEnd = S.Frames[nFrameIndexNext].nFrameStartCpu; + + float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu); + float fFrameMs = MicroProfileLogTickDifference(nTickStart, nFrameStart) * fToMs; + float fFrameEndMs = MicroProfileLogTickDifference(nTickStart, nFrameEnd) * fToMs; + float fFrameGpuMs = 0; + float fFrameGpuEndMs = 0; + if(nTickReference) + { + fFrameGpuMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndex].nFrameStartGpu) * fToMsGPU; + fFrameGpuEndMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndexNext].nFrameStartGpu) * fToMsGPU; + } + MicroProfilePrintf(CB, Handle, "Frames[%d] = MakeFrame(%d, %f, %f, %f, %f, ts%d, tt%d, ti%d);\n", i, 0, fFrameMs, fFrameEndMs, fFrameGpuMs, fFrameGpuEndMs, i, i, i); + } + + uint32_t nContextSwitchStart = 0; + uint32_t nContextSwitchEnd = 0; + MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nTickStart, nTickEnd); + + uint32_t nWrittenBefore = S.nWebServerDataSent; + MicroProfilePrintf(CB, Handle, "var CSwitchThreadInOutCpu = ["); + for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE) + { + MicroProfileContextSwitch CS = S.ContextSwitch[j]; + int nCpu = CS.nCpu; + MicroProfilePrintf(CB, Handle, "%d,%d,%d,", CS.nThreadIn, CS.nThreadOut, nCpu); + } + MicroProfilePrintf(CB, Handle, "];\n"); + MicroProfilePrintf(CB, Handle, "var CSwitchTime = ["); + float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE) + { + MicroProfileContextSwitch CS = S.ContextSwitch[j]; + float fTime = MicroProfileLogTickDifference(nTickStart, CS.nTicks) * fToMsCpu; + MicroProfilePrintf(CB, Handle, "%f,", fTime); + } + MicroProfilePrintf(CB, Handle, "];\n"); + uint32_t nWrittenAfter = S.nWebServerDataSent; + MicroProfilePrintf(CB, Handle, "//CSwitch Size %d\n", nWrittenAfter - nWrittenBefore); + + + for(size_t i = 0; i < g_MicroProfileHtml_end_count; ++i) + { + CB(Handle, g_MicroProfileHtml_end_sizes[i]-1, g_MicroProfileHtml_end[i]); + } + + uint32_t* nGroupCounter = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount); + + memset(nGroupCounter, 0, sizeof(uint32_t) * S.nGroupCount); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + uint32_t nGroupIndex = S.TimerInfo[i].nGroupIndex; + nGroupCounter[nGroupIndex] += nTimerCounter[i]; + } + + uint32_t* nGroupCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nGroupCount); + uint32_t* nTimerCounterSort = (uint32_t*)alloca(sizeof(uint32_t)* S.nTotalTimers); + for(uint32_t i = 0; i < S.nGroupCount; ++i) + { + nGroupCounterSort[i] = i; + } + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + nTimerCounterSort[i] = i; + } + std::sort(nGroupCounterSort, nGroupCounterSort + S.nGroupCount, + [nGroupCounter](const uint32_t l, const uint32_t r) + { + return nGroupCounter[l] > nGroupCounter[r]; + } + ); + + std::sort(nTimerCounterSort, nTimerCounterSort + S.nTotalTimers, + [nTimerCounter](const uint32_t l, const uint32_t r) + { + return nTimerCounter[l] > nTimerCounter[r]; + } + ); + + MicroProfilePrintf(CB, Handle, "\n<!--\nMarker Per Group\n"); + for(uint32_t i = 0; i < S.nGroupCount; ++i) + { + uint32_t idx = nGroupCounterSort[i]; + MicroProfilePrintf(CB, Handle, "%8d:%s\n", nGroupCounter[idx], S.GroupInfo[idx].pName); + } + MicroProfilePrintf(CB, Handle, "Marker Per Timer\n"); + for(uint32_t i = 0; i < S.nTotalTimers; ++i) + { + uint32_t idx = nTimerCounterSort[i]; + MicroProfilePrintf(CB, Handle, "%8d:%s(%s)\n", nTimerCounter[idx], S.TimerInfo[idx].pName, S.GroupInfo[S.TimerInfo[idx].nGroupIndex].pName); + } + MicroProfilePrintf(CB, Handle, "\n-->\n"); + + S.nActiveGroup = nActiveGroup; + S.nRunning = nRunning; + +#if MICROPROFILE_DEBUG + int64_t nTicksEnd = MP_TICK(); + float fMs = fToMsCpu * (nTicksEnd - S.nPauseTicks); + printf("html dump took %6.2fms\n", fMs); +#endif + + +} + +void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData) +{ + fwrite(pData, nSize, 1, (FILE*)Handle); +} + +void MicroProfileDumpToFile() +{ + std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex()); + if(S.nDumpFileNextFrame&1) + { + FILE* F = fopen(S.HtmlDumpPath, "w"); + if(F) + { + MicroProfileDumpHtml(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES, S.HtmlDumpPath); + fclose(F); + } + } + if(S.nDumpFileNextFrame&2) + { + FILE* F = fopen(S.CsvDumpPath, "w"); + if(F) + { + MicroProfileDumpCsv(MicroProfileWriteFile, F, MICROPROFILE_WEBSERVER_MAXFRAMES); + fclose(F); + } + } +} + +void MicroProfileFlushSocket(MpSocket Socket) +{ + send(Socket, &S.WebServerBuffer[0], S.WebServerPut, 0); + S.WebServerPut = 0; + +} + +void MicroProfileWriteSocket(void* Handle, size_t nSize, const char* pData) +{ + S.nWebServerDataSent += nSize; + MpSocket Socket = *(MpSocket*)Handle; + if(nSize > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2) + { + MicroProfileFlushSocket(Socket); + send(Socket, pData, nSize, 0); + + } + else + { + memcpy(&S.WebServerBuffer[S.WebServerPut], pData, nSize); + S.WebServerPut += nSize; + if(S.WebServerPut > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE/2) + { + MicroProfileFlushSocket(Socket); + } + } +} + +#if MICROPROFILE_MINIZ +#ifndef MICROPROFILE_COMPRESS_BUFFER_SIZE +#define MICROPROFILE_COMPRESS_BUFFER_SIZE (256<<10) +#endif + +#define MICROPROFILE_COMPRESS_CHUNK (MICROPROFILE_COMPRESS_BUFFER_SIZE/2) +struct MicroProfileCompressedSocketState +{ + unsigned char DeflateOut[MICROPROFILE_COMPRESS_CHUNK]; + unsigned char DeflateIn[MICROPROFILE_COMPRESS_CHUNK]; + mz_stream Stream; + MpSocket Socket; + uint32_t nSize; + uint32_t nCompressedSize; + uint32_t nFlushes; + uint32_t nMemmoveBytes; +}; + +void MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState* pState) +{ + mz_stream& Stream = pState->Stream; + unsigned char* pSendStart = &pState->DeflateOut[0]; + unsigned char* pSendEnd = &pState->DeflateOut[MICROPROFILE_COMPRESS_CHUNK - Stream.avail_out]; + if(pSendStart != pSendEnd) + { + send(pState->Socket, (const char*)pSendStart, pSendEnd - pSendStart, 0); + pState->nCompressedSize += pSendEnd - pSendStart; + } + Stream.next_out = &pState->DeflateOut[0]; + Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK; + +} +void MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState* pState, MpSocket Socket) +{ + mz_stream& Stream = pState->Stream; + memset(&Stream, 0, sizeof(Stream)); + Stream.next_out = &pState->DeflateOut[0]; + Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK; + Stream.next_in = &pState->DeflateIn[0]; + Stream.avail_in = 0; + mz_deflateInit(&Stream, Z_DEFAULT_COMPRESSION); + pState->Socket = Socket; + pState->nSize = 0; + pState->nCompressedSize = 0; + pState->nFlushes = 0; + pState->nMemmoveBytes = 0; + +} +void MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState* pState) +{ + mz_stream& Stream = pState->Stream; + MicroProfileCompressedSocketFlush(pState); + int r = mz_deflate(&Stream, MZ_FINISH); + MP_ASSERT(r == MZ_STREAM_END); + MicroProfileCompressedSocketFlush(pState); + r = mz_deflateEnd(&Stream); + MP_ASSERT(r == MZ_OK); +} + +void MicroProfileCompressedWriteSocket(void* Handle, size_t nSize, const char* pData) +{ + MicroProfileCompressedSocketState* pState = (MicroProfileCompressedSocketState*)Handle; + mz_stream& Stream = pState->Stream; + const unsigned char* pDeflateInEnd = Stream.next_in + Stream.avail_in; + const unsigned char* pDeflateInStart = &pState->DeflateIn[0]; + const unsigned char* pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK]; + pState->nSize += nSize; + if(nSize <= pDeflateInRealEnd - pDeflateInEnd) + { + memcpy((void*)pDeflateInEnd, pData, nSize); + Stream.avail_in += nSize; + MP_ASSERT(Stream.next_in + Stream.avail_in <= pDeflateInRealEnd); + return; + } + int Flush = 0; + while(nSize) + { + pDeflateInEnd = Stream.next_in + Stream.avail_in; + if(Flush) + { + pState->nFlushes++; + MicroProfileCompressedSocketFlush(pState); + pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK]; + if(pDeflateInEnd == pDeflateInRealEnd) + { + if(Stream.avail_in) + { + MP_ASSERT(pDeflateInStart != Stream.next_in); + memmove((void*)pDeflateInStart, Stream.next_in, Stream.avail_in); + pState->nMemmoveBytes += Stream.avail_in; + } + Stream.next_in = pDeflateInStart; + pDeflateInEnd = Stream.next_in + Stream.avail_in; + } + } + size_t nSpace = pDeflateInRealEnd - pDeflateInEnd; + size_t nBytes = MicroProfileMin(nSpace, nSize); + MP_ASSERT(nBytes + pDeflateInEnd <= pDeflateInRealEnd); + memcpy((void*)pDeflateInEnd, pData, nBytes); + Stream.avail_in += nBytes; + nSize -= nBytes; + pData += nBytes; + int r = mz_deflate(&Stream, MZ_NO_FLUSH); + Flush = r == MZ_BUF_ERROR || nBytes == 0 || Stream.avail_out == 0 ? 1 : 0; + MP_ASSERT(r == MZ_BUF_ERROR || r == MZ_OK); + if(r == MZ_BUF_ERROR) + { + r = mz_deflate(&Stream, MZ_SYNC_FLUSH); + } + } +} +#endif + + +#ifndef MicroProfileSetNonBlocking //fcntl doesnt work on a some unix like platforms.. +void MicroProfileSetNonBlocking(MpSocket Socket, int NonBlocking) +{ +#ifdef _WIN32 + u_long nonBlocking = NonBlocking ? 1 : 0; + ioctlsocket(Socket, FIONBIO, &nonBlocking); +#else + int Options = fcntl(Socket, F_GETFL); + if(NonBlocking) + { + fcntl(Socket, F_SETFL, Options|O_NONBLOCK); + } + else + { + fcntl(Socket, F_SETFL, Options&(~O_NONBLOCK)); + } +#endif +} +#endif + +void MicroProfileWebServerStart() +{ +#ifdef _WIN32 + WSADATA wsa; + if(WSAStartup(MAKEWORD(2, 2), &wsa)) + { + S.ListenerSocket = -1; + return; + } +#endif + + S.ListenerSocket = socket(PF_INET, SOCK_STREAM, 6); + MP_ASSERT(!MP_INVALID_SOCKET(S.ListenerSocket)); + MicroProfileSetNonBlocking(S.ListenerSocket, 1); + + S.nWebServerPort = (uint32_t)-1; + struct sockaddr_in Addr; + Addr.sin_family = AF_INET; + Addr.sin_addr.s_addr = INADDR_ANY; + for(int i = 0; i < 20; ++i) + { + Addr.sin_port = htons(MICROPROFILE_WEBSERVER_PORT+i); + if(0 == bind(S.ListenerSocket, (sockaddr*)&Addr, sizeof(Addr))) + { + S.nWebServerPort = MICROPROFILE_WEBSERVER_PORT+i; + break; + } + } + listen(S.ListenerSocket, 8); +} + +void MicroProfileWebServerStop() +{ +#ifdef _WIN32 + closesocket(S.ListenerSocket); + WSACleanup(); +#else + close(S.ListenerSocket); +#endif +} + +int MicroProfileParseGet(const char* pGet) +{ + const char* pStart = pGet; + while(*pGet != '\0') + { + if(*pGet < '0' || *pGet > '9') + return 0; + pGet++; + } + int nFrames = atoi(pStart); + if(nFrames) + { + return nFrames; + } + else + { + return MICROPROFILE_WEBSERVER_MAXFRAMES; + } +} +bool MicroProfileWebServerUpdate() +{ + MICROPROFILE_SCOPEI("MicroProfile", "Webserver-update", -1); + MpSocket Connection = accept(S.ListenerSocket, 0, 0); + bool bServed = false; + if(!MP_INVALID_SOCKET(Connection)) + { + std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex()); + char Req[8192]; + MicroProfileSetNonBlocking(Connection, 0); + int nReceived = recv(Connection, Req, sizeof(Req)-1, 0); + if(nReceived > 0) + { + Req[nReceived] = '\0'; +#if MICROPROFILE_MINIZ +#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nContent-Encoding: deflate\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n" +#else +#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nExpires: Tue, 01 Jan 2199 16:00:00 GMT\r\n\r\n" +#endif + char* pHttp = strstr(Req, "HTTP/"); + char* pGet = strstr(Req, "GET /"); + char* pHost = strstr(Req, "Host: "); + auto Terminate = [](char* pString) + { + char* pEnd = pString; + while(*pEnd != '\0') + { + if(*pEnd == '\r' || *pEnd == '\n' || *pEnd == ' ') + { + *pEnd = '\0'; + return; + } + pEnd++; + } + }; + if(pHost) + { + pHost += sizeof("Host: ")-1; + Terminate(pHost); + } + + if(pHttp && pGet) + { + *pHttp = '\0'; + pGet += sizeof("GET /")-1; + Terminate(pGet); + int nFrames = MicroProfileParseGet(pGet); + if(nFrames) + { + uint64_t nTickStart = MP_TICK(); + send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER)-1, 0); + uint64_t nDataStart = S.nWebServerDataSent; + S.WebServerPut = 0; + #if 0 == MICROPROFILE_MINIZ + MicroProfileDumpHtml(MicroProfileWriteSocket, &Connection, nFrames, pHost); + uint64_t nDataEnd = S.nWebServerDataSent; + uint64_t nTickEnd = MP_TICK(); + uint64_t nDiff = (nTickEnd - nTickStart); + float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff; + int nKb = ((nDataEnd-nDataStart)>>10) + 1; + int nCompressedKb = nKb; + MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n<!-- Sent %dkb in %.2fms-->\n\n",nKb, fMs); + MicroProfileFlushSocket(Connection); + #else + MicroProfileCompressedSocketState CompressState; + MicroProfileCompressedSocketStart(&CompressState, Connection); + MicroProfileDumpHtml(MicroProfileCompressedWriteSocket, &CompressState, nFrames, pHost); + S.nWebServerDataSent += CompressState.nSize; + uint64_t nDataEnd = S.nWebServerDataSent; + uint64_t nTickEnd = MP_TICK(); + uint64_t nDiff = (nTickEnd - nTickStart); + float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff; + int nKb = ((nDataEnd-nDataStart)>>10) + 1; + int nCompressedKb = ((CompressState.nCompressedSize)>>10) + 1; + MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs); + MicroProfileCompressedSocketFinish(&CompressState); + MicroProfileFlushSocket(Connection); + #endif + + #if MICROPROFILE_DEBUG + printf("\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs); + #endif + } + } + } +#ifdef _WIN32 + closesocket(Connection); +#else + close(Connection); +#endif + } + return bServed; +} +#endif + + + + +#if MICROPROFILE_CONTEXT_SWITCH_TRACE +//functions that need to be implemented per platform. +void* MicroProfileTraceThread(void* unused); +bool MicroProfileIsLocalThread(uint32_t nThreadId); + + +void MicroProfileStartContextSwitchTrace() +{ + if(!S.bContextSwitchRunning) + { + S.bContextSwitchRunning = true; + S.bContextSwitchStop = false; + MicroProfileThreadStart(&S.ContextSwitchThread, MicroProfileTraceThread); + } +} + +void MicroProfileStopContextSwitchTrace() +{ + if(S.bContextSwitchRunning) + { + S.bContextSwitchStop = true; + MicroProfileThreadJoin(&S.ContextSwitchThread); + } +} + + +#ifdef _WIN32 +#define INITGUID +#include <evntrace.h> +#include <evntcons.h> +#include <strsafe.h> + + +static GUID g_MicroProfileThreadClassGuid = { 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c }; + +struct MicroProfileSCSwitch +{ + uint32_t NewThreadId; + uint32_t OldThreadId; + int8_t NewThreadPriority; + int8_t OldThreadPriority; + uint8_t PreviousCState; + int8_t SpareByte; + int8_t OldThreadWaitReason; + int8_t OldThreadWaitMode; + int8_t OldThreadState; + int8_t OldThreadWaitIdealProcessor; + uint32_t NewThreadWaitTime; + uint32_t Reserved; +}; + + +VOID WINAPI MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent) +{ + if (pEvent->Header.Guid == g_MicroProfileThreadClassGuid) + { + if (pEvent->Header.Class.Type == 36) + { + MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*) pEvent->MofData; + if ((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0)) + { + MicroProfileContextSwitch Switch; + Switch.nThreadOut = pCSwitch->OldThreadId; + Switch.nThreadIn = pCSwitch->NewThreadId; + Switch.nCpu = pEvent->BufferContext.ProcessorNumber; + Switch.nTicks = pEvent->Header.TimeStamp.QuadPart; + MicroProfileContextSwitchPut(&Switch); + } + } + } +} + +ULONG WINAPI MicroProfileBufferCallback(PEVENT_TRACE_LOGFILE Buffer) +{ + return (S.bContextSwitchStop || !S.bContextSwitchRunning) ? FALSE : TRUE; +} + + +struct MicroProfileKernelTraceProperties : public EVENT_TRACE_PROPERTIES +{ + char dummy[sizeof(KERNEL_LOGGER_NAME)]; +}; + +void MicroProfileContextSwitchShutdownTrace() +{ + TRACEHANDLE SessionHandle = 0; + MicroProfileKernelTraceProperties sessionProperties; + + ZeroMemory(&sessionProperties, sizeof(sessionProperties)); + sessionProperties.Wnode.BufferSize = sizeof(sessionProperties); + sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID; + sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution + sessionProperties.Wnode.Guid = SystemTraceControlGuid; + sessionProperties.BufferSize = 1; + sessionProperties.NumberOfBuffers = 128; + sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH; + sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE; + sessionProperties.MaximumFileSize = 0; + sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES); + sessionProperties.LogFileNameOffset = 0; + + EVENT_TRACE_LOGFILE log; + ZeroMemory(&log, sizeof(log)); + log.LoggerName = KERNEL_LOGGER_NAME; + log.ProcessTraceMode = 0; + TRACEHANDLE hLog = OpenTrace(&log); + if (hLog) + { + ControlTrace(SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties, EVENT_TRACE_CONTROL_STOP); + } + CloseTrace(hLog); + + +} + +void* MicroProfileTraceThread(void* unused) +{ + + MicroProfileContextSwitchShutdownTrace(); + ULONG status = ERROR_SUCCESS; + TRACEHANDLE SessionHandle = 0; + MicroProfileKernelTraceProperties sessionProperties; + + ZeroMemory(&sessionProperties, sizeof(sessionProperties)); + sessionProperties.Wnode.BufferSize = sizeof(sessionProperties); + sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID; + sessionProperties.Wnode.ClientContext = 1; //QPC clock resolution + sessionProperties.Wnode.Guid = SystemTraceControlGuid; + sessionProperties.BufferSize = 1; + sessionProperties.NumberOfBuffers = 128; + sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH|EVENT_TRACE_FLAG_PROCESS; + sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE; + sessionProperties.MaximumFileSize = 0; + sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES); + sessionProperties.LogFileNameOffset = 0; + + + status = StartTrace((PTRACEHANDLE) &SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties); + + if (ERROR_SUCCESS != status) + { + S.bContextSwitchRunning = false; + return 0; + } + + EVENT_TRACE_LOGFILE log; + ZeroMemory(&log, sizeof(log)); + + log.LoggerName = KERNEL_LOGGER_NAME; + log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_RAW_TIMESTAMP; + log.EventCallback = MicroProfileContextSwitchCallback; + log.BufferCallback = MicroProfileBufferCallback; + + TRACEHANDLE hLog = OpenTrace(&log); + ProcessTrace(&hLog, 1, 0, 0); + CloseTrace(hLog); + MicroProfileContextSwitchShutdownTrace(); + + S.bContextSwitchRunning = false; + return 0; +} + +bool MicroProfileIsLocalThread(uint32_t nThreadId) +{ + HANDLE h = OpenThread(THREAD_QUERY_LIMITED_INFORMATION, FALSE, nThreadId); + if(h == NULL) + return false; + DWORD hProcess = GetProcessIdOfThread(h); + CloseHandle(h); + return GetCurrentProcessId() == hProcess; +} + +#elif defined(__APPLE__) +#include <sys/time.h> +void* MicroProfileTraceThread(void* unused) +{ + FILE* pFile = fopen("mypipe", "r"); + if(!pFile) + { + printf("CONTEXT SWITCH FAILED TO OPEN FILE: make sure to run dtrace script\n"); + S.bContextSwitchRunning = false; + return 0; + } + printf("STARTING TRACE THREAD\n"); + char* pLine = 0; + size_t cap = 0; + size_t len = 0; + struct timeval tv; + + gettimeofday(&tv, NULL); + + uint64_t nsSinceEpoch = ((uint64_t)(tv.tv_sec) * 1000000 + (uint64_t)(tv.tv_usec)) * 1000; + uint64_t nTickEpoch = MP_TICK(); + uint32_t nLastThread[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS] = {0}; + mach_timebase_info_data_t sTimebaseInfo; + mach_timebase_info(&sTimebaseInfo); + S.bContextSwitchRunning = true; + + uint64_t nProcessed = 0; + uint64_t nProcessedLast = 0; + while((len = getline(&pLine, &cap, pFile))>0 && !S.bContextSwitchStop) + { + nProcessed += len; + if(nProcessed - nProcessedLast > 10<<10) + { + nProcessedLast = nProcessed; + printf("processed %llukb %llukb\n", (nProcessed-nProcessedLast)>>10,nProcessed >>10); + } + + char* pX = strchr(pLine, 'X'); + if(pX) + { + int cpu = atoi(pX+1); + char* pX2 = strchr(pX + 1, 'X'); + char* pX3 = strchr(pX2 + 1, 'X'); + int thread = atoi(pX2+1); + char* lala; + int64_t timestamp = strtoll(pX3 + 1, &lala, 10); + MicroProfileContextSwitch Switch; + + //convert to ticks. + uint64_t nDeltaNsSinceEpoch = timestamp - nsSinceEpoch; + uint64_t nDeltaTickSinceEpoch = sTimebaseInfo.numer * nDeltaNsSinceEpoch / sTimebaseInfo.denom; + uint64_t nTicks = nDeltaTickSinceEpoch + nTickEpoch; + if(cpu < MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS) + { + Switch.nThreadOut = nLastThread[cpu]; + Switch.nThreadIn = thread; + nLastThread[cpu] = thread; + Switch.nCpu = cpu; + Switch.nTicks = nTicks; + MicroProfileContextSwitchPut(&Switch); + } + } + } + printf("EXITING TRACE THREAD\n"); + S.bContextSwitchRunning = false; + return 0; +} + +bool MicroProfileIsLocalThread(uint32_t nThreadId) +{ + return false; +} + +#endif +#else + +bool MicroProfileIsLocalThread(uint32_t nThreadId){return false;} +void MicroProfileStopContextSwitchTrace(){} +void MicroProfileStartContextSwitchTrace(){} + +#endif + + + + +#if MICROPROFILE_GPU_TIMERS_D3D11 +uint32_t MicroProfileGpuInsertTimeStamp() +{ + MicroProfileD3D11Frame& Frame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame]; + if(Frame.m_nRateQueryStarted) + { + uint32_t nCurrent = (Frame.m_nQueryStart + Frame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES; + uint32_t nNext = (nCurrent + 1) % MICROPROFILE_D3D_MAX_QUERIES; + if(nNext != S.GPU.m_nQueryGet) + { + Frame.m_nQueryCount++; + ID3D11Query* pQuery = (ID3D11Query*)S.GPU.m_pQueries[nCurrent]; + ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext; + pContext->End(pQuery); + S.GPU.m_nQueryPut = nNext; + return nCurrent; + } + } + return (uint32_t)-1; +} + +uint64_t MicroProfileGpuGetTimeStamp(uint32_t nIndex) +{ + if(nIndex == (uint32_t)-1) + { + return (uint64_t)-1; + } + int64_t nResult = S.GPU.m_nQueryResults[nIndex]; + MP_ASSERT(nResult != -1); + return nResult; +} + +bool MicroProfileGpuGetData(void* pQuery, void* pData, uint32_t nDataSize) +{ + HRESULT hr; + do + { + hr = ((ID3D11DeviceContext*)S.GPU.m_pDeviceContext)->GetData((ID3D11Query*)pQuery, pData, nDataSize, 0); + }while(hr == S_FALSE); + switch(hr) + { + case DXGI_ERROR_DEVICE_REMOVED: + case DXGI_ERROR_INVALID_CALL: + case E_INVALIDARG: + MP_BREAK(); + return false; + + } + return true; +} + +uint64_t MicroProfileTicksPerSecondGpu() +{ + return S.GPU.m_nQueryFrequency; +} + +void MicroProfileGpuFlip() +{ + MicroProfileD3D11Frame& CurrentFrame = S.GPU.m_QueryFrames[S.GPU.m_nQueryFrame]; + ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)S.GPU.m_pDeviceContext; + if(CurrentFrame.m_nRateQueryStarted) + { + pContext->End((ID3D11Query*)CurrentFrame.m_pRateQuery); + } + uint32_t nNextFrame = (S.GPU.m_nQueryFrame + 1) % MICROPROFILE_GPU_FRAME_DELAY; + MicroProfileD3D11Frame& OldFrame = S.GPU.m_QueryFrames[nNextFrame]; + if(OldFrame.m_nRateQueryStarted) + { + struct RateQueryResult + { + uint64_t nFrequency; + BOOL bDisjoint; + }; + RateQueryResult Result; + if(MicroProfileGpuGetData(OldFrame.m_pRateQuery, &Result, sizeof(Result))) + { + if(S.GPU.m_nQueryFrequency != (int64_t)Result.nFrequency) + { + if(S.GPU.m_nQueryFrequency) + { + OutputDebugString("Query freq changing"); + } + S.GPU.m_nQueryFrequency = Result.nFrequency; + } + uint32_t nStart = OldFrame.m_nQueryStart; + uint32_t nCount = OldFrame.m_nQueryCount; + for(uint32_t i = 0; i < nCount; ++i) + { + uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES; + + + + if(!MicroProfileGpuGetData(S.GPU.m_pQueries[nIndex], &S.GPU.m_nQueryResults[nIndex], sizeof(uint64_t))) + { + S.GPU.m_nQueryResults[nIndex] = -1; + } + } + } + else + { + uint32_t nStart = OldFrame.m_nQueryStart; + uint32_t nCount = OldFrame.m_nQueryCount; + for(uint32_t i = 0; i < nCount; ++i) + { + uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D_MAX_QUERIES; + S.GPU.m_nQueryResults[nIndex] = -1; + } + } + S.GPU.m_nQueryGet = (OldFrame.m_nQueryStart + OldFrame.m_nQueryCount) % MICROPROFILE_D3D_MAX_QUERIES; + } + + S.GPU.m_nQueryFrame = nNextFrame; + MicroProfileD3D11Frame& NextFrame = S.GPU.m_QueryFrames[nNextFrame]; + pContext->Begin((ID3D11Query*)NextFrame.m_pRateQuery); + NextFrame.m_nQueryStart = S.GPU.m_nQueryPut; + NextFrame.m_nQueryCount = 0; + + NextFrame.m_nRateQueryStarted = 1; +} + +void MicroProfileGpuInitD3D11(void* pDevice_, void* pDeviceContext_) +{ + ID3D11Device* pDevice = (ID3D11Device*)pDevice_; + ID3D11DeviceContext* pDeviceContext = (ID3D11DeviceContext*)pDeviceContext_; + S.GPU.m_pDeviceContext = pDeviceContext_; + + D3D11_QUERY_DESC Desc; + Desc.MiscFlags = 0; + Desc.Query = D3D11_QUERY_TIMESTAMP; + for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i) + { + HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_pQueries[i]); + MP_ASSERT(hr == S_OK); + S.GPU.m_nQueryResults[i] = -1; + } + S.GPU.m_nQueryPut = 0; + S.GPU.m_nQueryGet = 0; + S.GPU.m_nQueryFrame = 0; + S.GPU.m_nQueryFrequency = 0; + Desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i) + { + S.GPU.m_QueryFrames[i].m_nQueryStart = 0; + S.GPU.m_QueryFrames[i].m_nQueryCount = 0; + S.GPU.m_QueryFrames[i].m_nRateQueryStarted = 0; + HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&S.GPU.m_QueryFrames[i].m_pRateQuery); + MP_ASSERT(hr == S_OK); + } +} + + +void MicroProfileGpuShutdown() +{ + for(uint32_t i = 0; i < MICROPROFILE_D3D_MAX_QUERIES; ++i) + { + ((ID3D11Query*)&S.GPU.m_pQueries[i])->Release(); + S.GPU.m_pQueries[i] = 0; + } + for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i) + { + ((ID3D11Query*)S.GPU.m_QueryFrames[i].m_pRateQuery)->Release(); + S.GPU.m_QueryFrames[i].m_pRateQuery = 0; + } +} + +int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu) +{ + return 0; +} + + +#elif MICROPROFILE_GPU_TIMERS_GL +void MicroProfileGpuInitGL() +{ + S.GPU.GLTimerPos = 0; + glGenQueries(MICROPROFILE_GL_MAX_QUERIES, &S.GPU.GLTimers[0]); +} + +uint32_t MicroProfileGpuInsertTimeStamp() +{ + uint32_t nIndex = (S.GPU.GLTimerPos+1)%MICROPROFILE_GL_MAX_QUERIES; + glQueryCounter(S.GPU.GLTimers[nIndex], GL_TIMESTAMP); + S.GPU.GLTimerPos = nIndex; + return nIndex; +} +uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey) +{ + uint64_t result; + glGetQueryObjectui64v(S.GPU.GLTimers[nKey], GL_QUERY_RESULT, &result); + return result; +} + +uint64_t MicroProfileTicksPerSecondGpu() +{ + return 1000000000ll; +} + +int MicroProfileGetGpuTickReference(int64_t* pOutCpu, int64_t* pOutGpu) +{ + int64_t nGpuTimeStamp; + glGetInteger64v(GL_TIMESTAMP, &nGpuTimeStamp); + if(nGpuTimeStamp) + { + *pOutCpu = MP_TICK(); + *pOutGpu = nGpuTimeStamp; + #if 0 //debug test if timestamp diverges + static int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu(); + static int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu(); + static int64_t nGpuStart = 0; + static int64_t nCpuStart = 0; + if(!nCpuStart) + { + nCpuStart = *pOutCpu; + nGpuStart = *pOutGpu; + } + static int nCountDown = 100; + if(0 == nCountDown--) + { + int64_t nCurCpu = *pOutCpu; + int64_t nCurGpu = *pOutGpu; + double fDistanceCpu = (nCurCpu - nCpuStart) / (double)nTicksPerSecondCpu; + double fDistanceGpu = (nCurGpu - nGpuStart) / (double)nTicksPerSecondGpu; + + char buf[254]; + snprintf(buf, sizeof(buf)-1,"Distance %f %f diff %f\n", fDistanceCpu, fDistanceGpu, fDistanceCpu-fDistanceGpu); + OutputDebugString(buf); + nCountDown = 100; + } + #endif + return 1; + } + return 0; +} + + +#endif + +#undef S + +#ifdef _WIN32 +#pragma warning(pop) +#endif + + + + + +#endif +#endif +#ifdef MICROPROFILE_EMBED_HTML +#include "microprofile_html.h" +#endif diff --git a/externals/microprofile/microprofile_html.h b/externals/microprofile/microprofile_html.h new file mode 100644 index 000000000..01b624b60 --- /dev/null +++ b/externals/microprofile/microprofile_html.h @@ -0,0 +1,3868 @@ +///start file generated from microprofile.html +#ifdef MICROPROFILE_EMBED_HTML +const char g_MicroProfileHtml_begin_0[] = +"<!DOCTYPE HTML>\n" +"<html>\n" +"<head>\n" +"<title>MicroProfile Capture</title>\n" +"<style>\n" +"/* about css: http://bit.ly/1eMQ42U */\n" +"body {margin: 0px;padding: 0px; font: 12px Courier New;background-color:#474747; color:white;overflow:hidden;}\n" +"ul {list-style-type: none;margin: 0;padding: 0;}\n" +"li{display: inline; float:left;border:5px; position:relative;text-align:center;}\n" +"a {\n" +" float:left;\n" +" text-decoration:none;\n" +" display: inline;\n" +" text-align: center;\n" +" padding:5px;\n" +" padding-bottom:0px;\n" +" padding-top:0px;\n" +" color: #FFFFFF;\n" +" background-color: #474747;\n" +"}\n" +"a:hover, a:active{\n" +" background-color: #000000;\n" +"}\n" +"\n" +"ul ul {\n" +" position:absolute;\n" +" left:0;\n" +" top:100%;\n" +" margin-left:-999em;\n" +"}\n" +"li:hover ul {\n" +" margin-left:0;\n" +" margin-right:0;\n" +"}\n" +"ul li ul{ display:block;float:none;width:100%;}\n" +"ul li ul li{ display:block;float:none;width:100%;}\n" +"li li a{ display:block;float:none;width:100%;text-align:left;}\n" +"#nav li:hover div {margin-left:0;}\n" +".help {position:absolute;z-index:5;text-align:left;padding:2px;margin-left:-999em;background-color: #313131;width:300px;}\n" +".helpstart {position:absolute;z-index:5;text-align:left;padding:2px;background-color: #313131;width:300px;display:none}\n" +".root {z-index:1;position:absolute;top:0px;left:0px;}\n" +"</style>\n" +"</head>\n" +"<body style=\"\">\n" +"<canvas id=\"History\" height=\"70\" style=\"background-color:#474747;margin:0px;padding:0px;\"></canvas><canvas id=\"DetailedView\" height=\"200\" style=\"background-color:#474747;margin:0px;padding:0px;\"></canvas>\n" +"<div id=\"root\" class=\"root\">\n" +"<ul id=\"nav\">\n" +"<li><a href=\"javascript:void(0)\" onclick=\"ToggleDebugMode();\">?</a>\n" +"<div class=\"helpstart\" id=\"helpwindow\" style=\"left:20px;top:20px\">\n" +"History View:<br>\n" +"Click + Drag: Pan View<br>\n" +"Right Click + Drag : Zoom on region<br>\n" +"Click Frame : Center on frame<br>\n" +"<hr>\n" +"Main View:<br>\n" +"Ctrl + Mouse up/down: Zoom<br>\n" +"Mousewheel : Zoom<br>\n" +"Right Click + Drag: Zoom to region<br>\n" +"Ctrl + Drag: Pan<br>\n" +"Click + Drag: Pan<br>\n" +"<hr>\n" +"<table style=\"width:100%\">\n" +"<tr>\n" +"<td width=\"50%\" align=\"left\"><a href=\'javascript:void(0)\' onclick=\"ShowHelp(0, 0);\">Close</a></td>\n" +"<td width=\"50%\" align=\"right\"><a href=\'javascript:void(0)\' onclick=\"ShowHelp(0, 1);\">Close, Never Show</a></td>\n" +"</tr>\n" +"</table>\n" +"</div>\n" +"<div class=\"help\" id=\"divFrameInfo\" style=\"left:20px;top:300px;width:auto;\">\n" +"</div>\n" +"</li>\n" +"<li><a id=\'ModeSubMenuText\'>Mode</a>\n" +" <ul id=\'ModeSubMenu\'>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetMode(\'timers\', 0);\" id=\"buttonTimers\">Timers</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetMode(\'timers\', 1);\" id=\"buttonGroups\">Groups</a></li> \n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetMode(\'timers\', 2);\" id=\"buttonThreads\">Threads</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetMode(\'detailed\', 0);\" id=\"buttonDetailed\">Detailed</a></li>\n" +" </ul>\n" +"</li>\n" +"<li><a>Reference</a>\n" +" <ul id=\'ReferenceSubMenu\'>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'5ms\');\">5ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'10ms\');\">10ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'15ms\');\">15ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'20ms\');\">20ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'33ms\');\">33ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'50ms\');\">50ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'100ms\');\">100ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'250ms\');\">250ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'500ms\');\">500ms</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"SetReferenceTime(\'1000ms\');\">1000ms</a></li>\n" +" </ul>\n" +"</li>\n" +"<li id=\"ilThreads\"><a>Threads</a>\n" +" <ul id=\"ThreadSubMenu\">\n" +" <li><a href=\"javascript:void(0)\" onclick=\"ToggleThread();\">All</a></li>\n" +" <li><a>---</a></li>\n" +" </ul>\n" +"</li>\n" +"<li id=\"ilGroups\"><a>Groups</a>\n" +" <ul id=\"GroupSubMenu\">\n" +" <li><a href=\"javascript:void(0)\" onclick=\"ToggleGroup();\">All</a></li>\n" +" <li><a>---</a></li>\n" +" </ul>\n" +"</li>\n" +"<li id=\"ilOptions\"><a>Options </a>\n" +" <ul id=\'OptionsMenu\'>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"ToggleContextSwitch();\">Context Switch</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"ToggleDisableMerge();\">MergeDisable</a></li>\n" +" <li><a href=\"javascript:void(0)\" onclick=\"ToggleDisableLod();\">LodDisable</a></li>\n" +" <li id=\'GroupColors\'><a href=\"javascript:void(0)\" onclick=\"ToggleGroupColors();\">Group Colors</a></li>\n" +" <li id=\'TimersMeta\'><a href=\"javascript:void(0)\" onclick=\"ToggleTimersMeta();\">Meta</a></li>\n" +" <li id=\'ShowHelp\'><a href=\"javascript:void(0)\" onclick=\"ShowHelp(1,1);\">Help</a></li>\n" +"<!-- <li><a href=\"javascript:void(0)\" onclick=\"ToggleDebug();\">DEBUG</a></li> -->\n" +" </ul>\n" +"</li>\n" +"</ul>\n" +"</div>\n" +"<script>\n" +"function InvertColor(hexTripletColor) {\n" +" var color = hexTripletColor;\n" +" color = color.substring(1); // remove #\n" +" color = parseInt(color, 16); // convert to integer\n" +" var R = ((color >> 16) % 256)/255.0;\n" +" var G = ((color >> 8) % 256)/255.0;\n" +" var B = ((color >> 0) % 256)/255.0;\n" +" var lum = (0.2126*R + 0.7152*G + 0.0722*B);\n" +" if(lum < 0.7)\n" +" {\n" +" return \'#ffffff\';\n" +" }\n" +" else\n" +" {\n" +" return \'#333333\';\n" +" }\n" +"}\n" +"function InvertColorIndex(hexTripletColor) {\n" +" var color = hexTripletColor;\n" +" color = color.substring(1); // remove #\n" +" color = parseInt(color, 16); // convert to integer\n" +" var R = ((color >> 16) % 256)/255.0;\n" +" var G = ((color >> 8) % 256)/255.0;\n" +" var B = ((color >> 0) % 256)/255.0;\n" +" var lum = (0.2126*R + 0.7152*G + 0.0722*B);\n" +" if(lum < 0.7)\n" +" {\n" +" return 0;\n" +" }\n" +" else\n" +" {\n" +" return 1;\n" +" }\n" +"}\n" +"function MakeGroup(id, name, category, numtimers, isgpu, total, average, max, color)\n" +"{\n" +" var group = {\"id\":id, \"name\":name, \"category\":category, \"numtimers\":numtimers, \"isgpu\":isgpu, \"total\": total, \"average\" : average, \"max\" : max, \"color\":color};\n" +" return group;\n" +"}\n" +"\n" +"function MakeTimer(id, name, group, color, colordark, average, max, exclaverage, exclmax, callaverage, callcount, total, meta, metaavg, metamax)\n" +"{\n" +" var timer = {\"id\":id, \"name\":name, \"len\":name.length, \"color\":color, \"colordark\":colordark,\"timercolor\":color, \"textcolor\":InvertColor(color), \"group\":group, \"average\":average, \"max\":max, \"exclaverage\":exclaverage, \"exclmax\":exclmax, \"callaverage\":callaverage, \"callcount\":callcount, \"total\":total, \"meta\":meta, \"textcolorindex\":InvertColorIndex(color), \"metaavg\":metaavg, \"metamax\":metamax};\n" +" return timer;\n" +"}\n" +"function MakeFrame(id, framestart, frameend, framestartgpu, frameendgpu, ts, tt, ti)\n" +"{\n" +" var frame = {\"id\":id, \"framestart\":framestart, \"frameend\":frameend, \"framestartgpu\":framestartgpu, \"frameendgpu\":frameendgpu, \"ts\":ts, \"tt\":tt, \"ti\":ti};\n" +" return frame;\n" +"}\n" +"\n" +""; + +const size_t g_MicroProfileHtml_begin_0_size = sizeof(g_MicroProfileHtml_begin_0); +const char* g_MicroProfileHtml_begin[] = { +&g_MicroProfileHtml_begin_0[0], +}; +size_t g_MicroProfileHtml_begin_sizes[] = { +sizeof(g_MicroProfileHtml_begin_0), +}; +size_t g_MicroProfileHtml_begin_count = 1; +const char g_MicroProfileHtml_end_0[] = +"\n" +"\n" +"\n" +"var CanvasDetailedView = document.getElementById(\'DetailedView\');\n" +"var CanvasHistory = document.getElementById(\'History\');\n" +"var CanvasDetailedOffscreen = document.createElement(\'canvas\');\n" +"var g_Msg = \'0\';\n" +"\n" +"var Initialized = 0;\n" +"var fDetailedOffset = Frames[0].framestart;\n" +"var fDetailedRange = Frames[0].frameend - fDetailedOffset;\n" +"var nWidth = CanvasDetailedView.width;\n" +"var nHeight = CanvasDetailedView.height;\n" +"var ReferenceTime = 33;\n" +"var nHistoryHeight = 70;\n" +"var nOffsetY = 0;\n" +"var nOffsetBarsX = 0;\n" +"var nOffsetBarsY = 0;\n" +"var nBarsWidth = 80;\n" +"var NameWidth = 200;\n" +"var MouseButtonState = [0,0,0,0,0,0,0,0];\n" +"var KeyShiftDown = 0;\n" +"var MouseDragButton = 0;\n" +"var KeyCtrlDown = 0;\n" +"var FlipToolTip = 0;\n" +"var DetailedViewMouseX = 0;\n" +"var DetailedViewMouseY = 0;\n" +"var HistoryViewMouseX = -1;\n" +"var HistoryViewMouseY = -1;\n" +"var MouseHistory = 0;\n" +"var MouseDetailed = 0;\n" +"var FontHeight = 10;\n" +"var FontWidth = 1;\n" +"var FontAscent = 3; //Set manually\n" +"var Font = \'Bold \' + FontHeight + \'px Courier New\';\n" +"var FontFlash = \'Bold \' + 35 + \'px Courier New\';\n" +"var BoxHeight = FontHeight + 2;\n" +"var ThreadsActive = new Object();\n" +"var ThreadsAllActive = 1;\n" +"var GroupsActive = new Object();\n" +"var GroupsAllActive = 1;\n" +"var nMinWidth = 0.01;//subpixel width\n" +"var nMinWidthPan = 1.0;//subpixel width when panning\n" +"var nContextSwitchEnabled = 1;\n" +"var DisableLod = 0;\n" +"var DisableMerge = 0;\n" +"var GroupColors = 0;\n" +"var nModDown = 0;\n" +"var g_MSG = \'no\';\n" +"var nDrawCount = 0;\n" +"var nBackColors = [\'#474747\', \'#313131\' ];\n" +"var nBackColorOffset = \'#606060\';\n" +"var CSwitchColors =[\"#9DD8AF\",\"#D7B6DA\",\"#EAAC76\",\"#DBDA61\",\"#8AD5E1\",\"#8CE48B\",\"#C4D688\",\"#57E5C4\"];//generated by http://tools.medialab.sciences-po.fr/iwanthue/index.php\n" +"var CSwitchHeight = 5;\n" +"var FRAME_HISTORY_COLOR_CPU = \'#ff7f27\';\n" +"var FRAME_HISTORY_COLOR_GPU = \'#ffffff\';\n" +"var ZOOM_TIME = 0.5;\n" +"var AnimationActive = false;\n" +"var nHoverCSCpu = -1;\n" +"var nHoverCSCpuNext = -1;\n" +"var nHoverCSToolTip = null;\n" +"var nHoverToken = -1;\n" +"var nHoverFrame = -1;\n" +"var nHoverTokenIndex = -1;\n" +"var nHoverTokenLogIndex = -1;\n" +"var nHoverCounter = 0;\n" +"var nHoverCounterDelta = 8;\n" +"var nHoverTokenNext = -1;\n" +"var nHoverTokenLogIndexNext = -1;\n" +"var nHoverTokenIndexNext = -1;\n" +"var nHideHelp = 0;\n" +"\n" +"\n" +"var fFrameScale = 33.33;\n" +"var fRangeBegin = 0;\n" +"var fRangeEnd = -1;\n" +"var fRangeBeginNext = 0;\n" +"var fRangeEndNext = 0;\n" +"var fRangeBeginGpuNext = 0;\n" +"var fRangeEndGpuNext = 0;\n" +"var fRangeBeginHistory = -1;\n" +"var fRangeEndHistory = -1;\n" +"var fRangeBeginHistoryGpu = -1;\n" +"var fRangeEndHistoryGpu = -1;\n" +"var fRangeBeginSelect = 0;\n" +"var fRangeEndSelect = -1;\n" +"\n" +"var ModeDetailed = 0;\n" +"var ModeTimers = 1;\n" +"var Mode = ModeDetailed;\n" +"\n" +"var DebugDrawQuadCount = 0;\n" +"var DebugDrawTextCount = 0;\n" +"var ProfileMode = 0;\n" +"var ProfileFps = 0;\n" +"var ProfileFpsAggr = 0;\n" +"var ProfileFpsCount = 0;\n" +"var ProfileLastTimeStamp = new Date();\n" +"\n" +"var CSwitchCache = {};\n" +"var CSwitchOnlyThreads = [];\n" +"var ProfileData = {};\n" +"var ProfileStackTime = {};\n" +"var ProfileStackName = {};\n" +"var Debug = 1;\n" +"\n" +"var g_MaxStack = Array();\n" +"var g_TypeArray;\n" +"var g_TimeArray;\n" +"var g_IndexArray;\n" +"var LodData = new Array();\n" +"var NumLodSplits = 10;\n" +"var SplitMin = 100;\n" +"var SPLIT_LIMIT = 1e20;\n" +"var DPR = 1;\n" +"var DetailedRedrawState = {};\n" +"var OffscreenData;\n" +"var DetailedFrameCounter = 0;\n" +"var Invalidate = 0;\n" +"var GroupOrder = Array();\n" +"var ThreadOrder = Array();\n" +"var TimersGroups = 0;\n" +"var TimersMeta = 1;\n" +"var MetaLengths = Array();\n" +"var MetaLengthsAvg = Array();\n" +"var MetaLengthsMax = Array();\n" +"\n" +"\n" +"function ProfileModeClear()\n" +"{\n" +" if(ProfileMode)\n" +" {\n" +" ProfileData = new Object();\n" +" ProfileStackTime = new Array();\n" +" ProfileStackName = new Array();\n" +" }\n" +"}\n" +"function ProfileEnter(Name)\n" +"{\n" +" if(ProfileMode)\n" +" {\n" +" ProfileStackTime.push(new Date());\n" +" ProfileStackName.push(Name);\n" +" }\n" +"}\n" +"function ProfileLeave()\n" +"{\n" +" if(ProfileMode)\n" +" {\n" +" var Time = new Date();\n" +" var Delta = Time - ProfileStackTime.pop();\n" +" var Name = ProfileStackName.pop();\n" +" var Obj = ProfileData[Name];\n" +" if(!Obj)\n" +" {\n" +" Obj = new Object();\n" +" Obj.Count = 0;\n" +" Obj.Name = Name;\n" +" Obj.Time = 0;\n" +" ProfileData[Name] = Obj;\n" +" }\n" +" Obj.Time += Delta;\n" +" Obj.Count += 1;\n" +" }\n" +"}\n" +"\n" +"function ProfilePlot(s)\n" +"{\n" +" if(ProfileMode)\n" +" {\n" +" var A = ProfileData.Plot;\n" +" if(!A)\n" +" {\n" +" ProfileData.Plot = Array();\n" +" A = ProfileData.Plot;\n" +" }\n" +" if(A.length<10)\n" +" {\n" +" A.push(s);\n" +" }\n" +" }\n" +"}\n" +"function ProfileModeDump()\n" +"{\n" +" for(var idx in ProfileData)\n" +" {\n" +" var Timer = ProfileData[idx];\n" +" console.log(Timer.Name + \" \" + Timer.Time + \"ms \" + Timer.Count);\n" +" }\n" +"\n" +"}\n" +"function ProfileModeDraw(Canvas)\n" +"{\n" +" if(ProfileMode)\n" +" {\n" +" var StringArray = [];\n" +" for(var idx in ProfileData)\n" +" {\n" +" if(idx == \"Plot\")\n" +" continue;\n" +" var Timer = ProfileData[idx];\n" +" StringArray.push(Timer.Name);\n" +" StringArray.push(Timer.Time + \"ms\");\n" +" StringArray.push(\"#\");\n" +" StringArray.push(\"\" + Timer.Count);\n" +" }\n" +" StringArray.push(\"debug\");\n" +" StringArray.push(Debug);\n" +" var Time = new Date();\n" +" var Delta = Time - ProfileLastTimeStamp;\n" +" ProfileLastTimeStamp = Time;\n" +" StringArray.push(\"Frame Delta\");\n" +" StringArray.push(Delta + \"ms\");\n" +" if(ProfileMode == 2)\n" +" {\n" +" ProfileFpsAggr += Delta;\n" +" ProfileFpsCount ++ ;\n" +" var AggrFrames = 10;\n" +" if(ProfileFpsCount == AggrFrames)\n" +" {\n" +" ProfileFps = 1000 / (ProfileFpsAggr / AggrFrames);\n" +" ProfileFpsAggr = 0;\n" +" ProfileFpsCount = 0;\n" +" }\n" +" StringArray.push(\"FPS\");\n" +" StringArray.push(\"\" + ProfileFps.toFixed(2));\n" +" }\n" +"\n" +"\n" +" for(var i = 0; i < ProfileData.Plot; ++i)\n" +" {\n" +" StringArray.push(\"\");\n" +" StringArray.push(ProfileData.Plot[i]);\n" +" }\n" +" ProfileData.Plot = Array();\n" +" DrawToolTip(StringArray, Canvas, 0, 200);\n" +" }\n" +"}\n" +"\n" +"function ToggleDebugMode()\n" +"{\n" +" ProfileMode = (ProfileMode+1)%4;\n" +" console.log(\'Toggle Debug Mode \' + ProfileMode);\n" +"}\n" +"\n" +"function DetailedTotal()\n" +"{\n" +" var Total = 0;\n" +" for(var i = 0; i < Frames.length; i++)\n" +" {\n" +" var frfr = Frames[i];\n" +" Total += frfr.frameend - frfr.framestart;\n" +" }\n" +" return Total;\n" +"}\n" +"\n" +"function InitFrameInfo()\n" +"{\n" +"\n" +" var div = document.getElementById(\'divFrameInfo\');\n" +" var txt = \'\';\n" +" txt = txt + \'Timers View\' + \'<br>\';\n" +" txt = txt + \'Frames:\' + AggregateInfo.Frames +\'<br>\';\n" +" txt = txt + \'Time:\' + AggregateInfo.Time.toFixed(2) +\'ms<br>\';\n" +" txt = txt + \'<hr>\';\n" +" txt = txt + \'Detailed View\' + \'<br>\';\n" +" txt = txt + \'Frames:\' + Frames.length +\'<br>\';\n" +" txt = txt + \'Time:\' + DetailedTotal().toFixed(2) +\'ms<br>\';\n" +" div.innerHTML = txt;\n" +"}\n" +"function InitGroups()\n" +"{\n" +" for(groupid in GroupInfo)\n" +" {\n" +" var TimerArray = Array();\n" +" for(timerid in TimerInfo)\n" +" {\n" +" if(TimerInfo[timerid].group == groupid)\n" +" {\n" +" TimerArray.push(timerid);\n" +" }\n" +" }\n" +" GroupInfo[groupid].TimerArray = TimerArray;\n" +" }\n" +"}\n" +"\n" +"function InitThreadMenu()\n" +"{\n" +" var ulThreadMenu = document.getElementById(\'ThreadSubMenu\');\n" +" var MaxLen = 7;\n" +" ThreadOrder = CreateOrderArray(ThreadNames, function(a){return a;});\n" +" for(var idx in ThreadOrder)\n" +" {\n" +" var name = ThreadNames[ThreadOrder[idx]];\n" +" var li = document.createElement(\'li\');\n" +" if(name.length > MaxLen)\n" +" {\n" +" MaxLen = name.length;\n" +" }\n" +" li.innerText = name;\n" +" var asText = li.innerHTML;\n" +" var html = \'<a href=\"javascript:void(0)\" onclick=\"ToggleThread(\\'\' + name + \'\\');\">\' + asText + \'</a>\';\n" +" li.innerHTML = html;\n" +" ulThreadMenu.appendChild(li);\n" +" }\n" +" var LenStr = (5+(1+MaxLen) * (1+FontWidth)) + \'px\';\n" +" var Lis = ulThreadMenu.getElementsByTagName(\'li\');\n" +" for(var i = 0; i < Lis.length; ++i)\n" +" {\n" +" Lis[i].style[\'width\'] = LenStr;\n" +" }\n" +"}\n" +"\n" +"function UpdateThreadMenu()\n" +"{\n" +" var ulThreadMenu = document.getElementById(\'ThreadSubMenu\');\n" +" var as = ulThreadMenu.getElementsByTagName(\'a\');\n" +" for(var i = 0; i < as.length; ++i)\n" +" {\n" +" var elem = as[i];\n" +" var inner = elem.innerText;\n" +" var bActive = false;\n" +" if(i < 2)\n" +" {\n" +" if(inner == \'All\')\n" +" {\n" +" bActive = ThreadsAllActive;\n" +" }\n" +" }\n" +" else\n" +" {\n" +" bActive = ThreadsActive[inner];\n" +" }\n" +" if(bActive)\n" +" {\n" +" elem.style[\'text-decoration\'] = \'underline\';\n" +" }\n" +" else\n" +" {\n" +" elem.style[\'text-decoration\'] = \'none\';\n" +" }\n" +" }\n" +"}\n" +"\n" +"function ToggleThread(ThreadName)\n" +"{\n" +" if(ThreadName)\n" +" {\n" +" if(ThreadsActive[ThreadName])\n" +" {\n" +" ThreadsActive[ThreadName] = false;\n" +" }\n" +" else\n" +" {\n" +" ThreadsActive[ThreadName] = true;\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if(ThreadsAllActive)\n" +" {\n" +" ThreadsAllActive = 0;\n" +" }\n" +" else\n" +" {\n" +" ThreadsAllActive = 1;\n" +" }\n" +" }\n" +" Invalidate = 0;\n" +" UpdateThreadMenu();\n" +" WriteCookie();\n" +" Draw(1);\n" +"\n" +"}\n" +"\n" +"function CreateOrderArray(Source, NameFunc)\n" +"{\n" +" var Temp = Array(Source.length);\n" +" for(var i = 0; i < Source.length; ++i)\n" +" {\n" +" Temp[i] = {};\n" +" Temp[i].index = i;\n" +" Temp[i].namezz = NameFunc(Source[i]).toLowerCase();\n" +" }\n" +" Temp.sort(function(l, r)\n" +" { \n" +" if(r.namezz<l.namezz)\n" +" {return 1;}\n" +" if(l.namezz<r.namezz)\n" +" {return -1;} \n" +" return 0;\n" +" } );\n" +" var OrderArray = Array(Source.length);\n" +" for(var i = 0; i < Source.length; ++i)\n" +" {\n" +" OrderArray[i] = Temp[i].index;\n" +" }\n" +" return OrderArray;\n" +"}\n" +"\n" +"\n" +"function InitGroupMenu()\n" +"{\n" +" var ulGroupMenu = document.getElementById(\'GroupSubMenu\');\n" +" var MaxLen = 7;\n" +" var MenuArray = Array();\n" +" for(var i = 0; i < GroupInfo.length; ++i)\n" +" {\n" +" var x = {};\n" +" x.IsCategory = 0;\n" +" x.category = GroupInfo[i].category;\n" +" x.name = GroupInfo[i].name;\n" +" x.index = i;\n" +" MenuArray.push(x);\n" +" }\n" +" for(var i = 0; i < CategoryInfo.length; ++i)\n" +" {\n" +" var x = {};\n" +" x.IsCategory = 1;\n" +" x.category = i;\n" +" x.name = CategoryInfo[i];\n" +" x.index = i;\n" +" MenuArray.push(x);\n" +" }\n" +" var OrderFunction = function(a){ return a.category + \"__\" + a.name; };\n" +" var OrderFunctionMenu = function(a){ return a.IsCategory ? (a.category + \'\') : (a.category + \"__\" + a.name); };\n" +" GroupOrder = CreateOrderArray(GroupInfo, OrderFunction);\n" +" var MenuOrder = CreateOrderArray(MenuArray, OrderFunctionMenu);\n" +"\n" +" for(var idx in MenuOrder)\n" +" {\n" +" var MenuItem = MenuArray[MenuOrder[idx]];\n" +" var name = MenuItem.name;\n" +" var li = document.createElement(\'li\');\n" +" if(name.length > MaxLen)\n" +" {\n" +" MaxLen = name.length;\n" +" }\n" +" var jsfunc = \'\';\n" +" if(MenuItem.IsCategory)\n" +" { \n" +" li.innerText = \'[\' + name + \']\';\n" +" jsfunc = \"ToggleCategory\";\n" +" }\n" +" else\n" +" {\n" +" li.innerText = name;\n" +" jsfunc = \"ToggleGroup\";\n" +" }\n" +" var asText = li.innerHTML;\n" +" var html = \'<a href=\"javascript:void(0)\" onclick=\"\' + jsfunc + \'(\\'\' + name + \'\\');\">\' + asText + \'</a>\';\n" +" li.innerHTML = html;\n" +" ulGroupMenu.appendChild(li);\n" +" }\n" +" var LenStr = (5+(1+MaxLen) * FontWidth) + \'px\';\n" +" var Lis = ulGroupMenu.getElementsByTagName(\'li\');\n" +" for(var i = 0; i < Lis.length; ++i)\n" +" {\n" +" Lis[i].style[\'width\'] = LenStr;\n" +" }\n" +" UpdateGroupMenu();\n" +"}\n" +"\n" +"function UpdateGroupMenu()\n" +"{\n" +" var ulThreadMenu = document.getElementById(\'GroupSubMenu\');\n" +" var as = ulThreadMenu.getElementsByTagName(\'a\');\n" +" for(var i = 0; i < as.length; ++i)\n" +" {\n" +" var elem = as[i];\n" +" var inner = elem.innerText;\n" +" var bActive = false;\n" +" if(i < 2)\n" +" {\n" +" if(inner == \'All\')\n" +" {\n" +" bActive = GroupsAllActive;\n" +" }\n" +" }\n" +" else\n" +" {\n" +" var CategoryString = inner.length>2 ? inner.substring(1, inner.length-2) : \"\";\n" +" var CategoryIdx = CategoryIndex(CategoryString);\n" +" if(inner[0] == \'[\' && inner[inner.length-1] == \']\' && CategoryIdx >= 0)\n" +" {\n" +" bActive = IsCategoryActive(CategoryIdx);\n" +" }\n" +" else\n" +" {\n" +" bActive = GroupsActive[inner];\n" +" }\n" +" }\n" +" if(bActive)\n" +" {\n" +" elem.style[\'text-decoration\'] = \'underline\';\n" +" }\n" +" else\n" +" {\n" +" elem.style[\'text-decoration\'] = \'none\';\n" +" }\n" +" }\n" +"}\n" +"function CategoryIndex(CategoryName)\n" +"{\n" +" for(var i = 0; i < CategoryInfo.length; ++i)\n" +" {\n" +" if(CategoryInfo[i] == CategoryName)\n" +" {\n" +" return i;\n" +" }\n" +" }\n" +" return -1;\n" +"}\n" +"function IsCategoryActive(CategoryIdx)\n" +"{\n" +" for(var i = 0; i < GroupInfo.length; ++i)\n" +" {\n" +" if(GroupInfo[i].category == CategoryIdx)\n" +" {\n" +" var Name = GroupInfo[i].name;\n" +" if(!GroupsActive[Name])\n" +" {\n" +" return false;\n" +" }\n" +" }\n" +" }\n" +" return true;\n" +"\n" +"}\n" +"function ToggleCategory(CategoryName)\n" +"{\n" +" var CategoryIdx = CategoryIndex(CategoryName);\n" +" if(CategoryIdx < 0)\n" +" return;\n" +" var CategoryActive = IsCategoryActive(CategoryIdx);\n" +" for(var i = 0; i < GroupInfo.length; ++i)\n" +" {\n" +" if(GroupInfo[i].category == CategoryIdx)\n" +" {\n" +" var Name = GroupInfo[i].name;\n" +" if(CategoryActive)\n" +" {\n" +" GroupsActive[Name] = false;\n" +" }\n" +" else\n" +" {\n" +" GroupsActive[Name] = true;\n" +" }\n" +" }\n" +" }\n" +" UpdateGroupMenu();\n" +" WriteCookie();\n" +" RequestRedraw();\n" +"}\n" +"\n" +"function ToggleGroup(GroupName)\n" +"{\n" +" if(GroupName)\n" +" {\n" +" if(GroupsActive[GroupName])\n" +" {\n" +" GroupsActive[GroupName] = false;\n" +" }\n" +" else\n" +" {\n" +" GroupsActive[GroupName] = true;\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if(GroupsAllActive)\n" +" {\n" +" GroupsAllActive = 0;\n" +" }\n" +" else\n" +" {\n" +" GroupsAllActive = 1;\n" +" }\n" +" }\n" +" UpdateGroupMenu();\n" +" WriteCookie();\n" +" RequestRedraw();\n" +"}\n" +"function UpdateGroupColors()\n" +"{\n" +" for(var i = 0; i < TimerInfo.length; ++i)\n" +" {\n" +" if(GroupColors)\n" +" {\n" +" TimerInfo[i].color = GroupInfo[TimerInfo[i].group].color;\n" +" }\n" +" else\n" +" {\n" +" TimerInfo[i].color = TimerInfo[i].timercolor;\n" +" }\n" +" TimerInfo[i].textcolorindex = InvertColorIndex(TimerInfo[i].color);\n" +" }\n" +"}\n" +"\n" +"function ToggleGroupColors()\n" +"{\n" +" GroupColors = !GroupColors;\n" +" UpdateGroupColors();\n" +" UpdateOptionsMenu();\n" +" WriteCookie();\n" +" RequestRedraw();\n" +"}\n" +"\n" +"function UpdateOptionsMenu()\n" +"{\n" +" var ulTimersMeta = document.getElementById(\'TimersMeta\');\n" +" ulTimersMeta.style[\'text-decoration\'] = TimersMeta ? \'underline\' : \'none\';\n" +" var ulGroupColors = document.getElementById(\'GroupColors\');\n" +" ulGroupColors.style[\'text-decoration\'] = GroupColors ? \'underline\' : \'none\';\n" +"}\n" +"\n" +"function ToggleTimersMeta()\n" +"{\n" +" TimersMeta = TimersMeta ? 0 : 1;\n" +" WriteCookie();\n" +" UpdateOptionsMenu();\n" +" RequestRedraw();\n" +"}\n" +"\n" +"function ShowHelp(Show, Forever)\n" +"{\n" +" var HelpWindow = document.getElementById(\'helpwindow\');\n" +" if(Show)\n" +" {\n" +" HelpWindow.style[\'display\'] = \'block\';\n" +" }\n" +" else\n" +" {\n" +" HelpWindow.style[\'display\'] = \'none\';\n" +" }\n" +" if(Forever)\n" +" {\n" +" nHideHelp = Show ? 0 : 1;\n" +" WriteCookie();\n" +" }\n" +"}\n" +"function SetMode(NewMode, Groups)\n" +"{\n" +" var buttonTimers = document.getElementById(\'buttonTimers\');\n" +" var buttonDetailed = document.getElementById(\'buttonDetailed\');\n" +" var buttonGroups = document.getElementById(\'buttonGroups\');\n" +" var buttonThreads = document.getElementById(\'buttonThreads\');\n" +" var ilThreads = document.getElementById(\'ilThreads\');\n" +" var ilGroups = document.getElementById(\'ilGroups\');\n" +" var ModeElement = null;\n" +" if(NewMode == \'timers\' || NewMode == ModeTimers)\n" +" {\n" +" TimersGroups = Groups;\n" +" buttonTimers.style[\'text-decoration\'] = TimersGroups ? \'none\' : \'underline\';\n" +" buttonGroups.style[\'text-decoration\'] = TimersGroups == 1 ? \'underline\' : \'none\';\n" +" buttonThreads.style[\'text-decoration\'] = TimersGroups == 2 ? \'underline\' : \'none\';\n" +" buttonDetailed.style[\'text-decoration\'] = \'none\';\n" +" if(TimersGroups == 0)\n" +" {\n" +" ilThreads.style[\'display\'] = \'none\';\n" +" }\n" +" else\n" +" {\n" +" ilThreads.style[\'display\'] = \'block\';\n" +" }\n" +" ilGroups.style[\'display\'] = \'block\';\n" +" Mode = ModeTimers;\n" +" ModeElement = TimersGroups == 2 ? buttonThreads : TimersGroups == 1 ? buttonGroups : buttonTimers;\n" +" }\n" +" else if(NewMode == \'detailed\' || NewMode == ModeDetailed)\n" +" {\n" +" buttonTimers.style[\'text-decoration\'] = \'none\';\n" +" buttonGroups.style[\'text-decoration\'] = \'none\';\n" +" buttonThreads.style[\'text-decoration\'] = \'none\';\n" +" buttonDetailed.style[\'text-decoration\'] = \'underline\';\n" +" ilThreads.style[\'display\'] = \'block\';\n" +" ilGroups.style[\'display\'] = \'none\';\n" +" Mode = ModeDetailed;\n" +" ModeElement = buttonDetailed;\n" +" }\n" +" var ModeSubMenuText = document.getElementById(\'ModeSubMenuText\');\n" +" ModeSubMenuText.innerText = \'Mode[\' + ModeElement.innerText + \']\';\n" +"\n" +" WriteCookie();\n" +" RequestRedraw();\n" +"\n" +"}\n" +"\n" +"function SetReferenceTime(TimeString)\n" +"{\n" +" ReferenceTime = parseInt(TimeString);\n" +" var ReferenceMenu = document.getElementById(\'ReferenceSubMenu\');\n" +" var Links = ReferenceMenu.getElementsByTagName(\'a\');\n" +" for(var i = 0; i < Links.length; ++i)\n" +" {\n" +" if(Links[i].innerHTML.match(\'^\' + TimeString))\n" +" {\n" +" Links[i].style[\'text-decoration\'] = \'underline\';\n" +" }\n" +" else\n" +" {\n" +" Links[i].style[\'text-decoration\'] = \'none\';\n" +" }\n" +" }\n" +" WriteCookie();\n" +" RequestRedraw();\n" +"\n" +"}\n" +"\n" +"function ToggleContextSwitch()\n" +"{\n" +" SetContextSwitch(nContextSwitchEnabled ? 0 : 1);\n" +"}\n" +"function SetContextSwitch(Enabled)\n" +"{\n" +" nContextSwitchEnabled = Enabled ? 1 : 0;\n" +" var ReferenceMenu = document.getElementById(\'OptionsMenu\');\n" +" var Links = ReferenceMenu.getElementsByTagName(\'a\');\n" +" Links[0].style[\'text-decoration\'] = nContextSwitchEnabled ? \'underline\' : \'none\';\n" +" WriteCookie();\n" +" RequestRedraw();\n" +"}\n" +"\n" +"function ToggleDebug()\n" +"{\n" +" Debug = (Debug + 1) % 2;\n" +"}\n" +"\n" +"function ToggleDisableMerge()\n" +"{\n" +" DisableMerge = DisableMerge ? 0 : 1;\n" +" var ReferenceMenu = document.getElementById(\'OptionsMenu\');\n" +" var Links = ReferenceMenu.getElementsByTagName(\'a\');\n" +" if(DisableMerge)\n" +" {\n" +" Links[1].style[\'text-decoration\'] = \'underline\';\n" +" }\n" +" else\n" +" {\n" +" Links[1].style[\'text-decoration\'] = \'none\';\n" +" }\n" +"\n" +"}\n" +"\n" +"function ToggleDisableLod()\n" +"{\n" +" DisableLod = DisableLod ? 0 : 1;\n" +" var ReferenceMenu = document.getElementById(\'OptionsMenu\');\n" +" var Links = ReferenceMenu.getElementsByTagName(\'a\');\n" +" if(DisableLod)\n" +" {\n" +" Links[2].style[\'text-decoration\'] = \'underline\';\n" +" }\n" +" else\n" +" {\n" +" Links[2].style[\'text-decoration\'] = \'none\';\n" +" }\n" +"\n" +"}\n" +"\n" +"function GatherHoverMetaCounters(TimerIndex, StartIndex, nLog, nFrameLast)\n" +"{\n" +" var HoverInfo = new Object();\n" +" var StackPos = 1;\n" +" //search backwards, count meta counters \n" +" for(var i = nFrameLast; i >= 0; i--)\n" +" {\n" +" var fr = Frames[i];\n" +" var ts = fr.ts[nLog];\n" +" var ti = fr.ti[nLog];\n" +" var tt = fr.tt[nLog];\n" +" var start = i == nFrameLast ? StartIndex-1 : ts.length-1;\n" +"\n" +" for(var j = start; j >= 0; j--)\n" +" {\n" +" var type = tt[j];\n" +" var index = ti[j];\n" +" var time = ts[j];\n" +" if(type == 1)\n" +" {\n" +" StackPos--;\n" +" if(StackPos == 0 && index == TimerIndex)\n" +" {\n" +" return HoverInfo;\n" +" }\n" +" }\n" +" else if(type == 0)\n" +" {\n" +" StackPos++;\n" +" }\n" +" else if(type > 3)\n" +" {\n" +" var nMetaCount = type - 3;\n" +" var nMetaIndex = MetaNames[index];\n" +" if(nMetaIndex in HoverInfo)\n" +" {\n" +" HoverInfo[nMetaIndex] += nMetaCount;\n" +" }\n" +" else\n" +" {\n" +" HoverInfo[nMetaIndex] = nMetaCount;\n" +" }\n" +" }\n" +" }\n" +" }\n" +"}\n" +"function CalculateAllTimers(fBegin, fEnd)\n" +"{\n" +" var Sum = [];\n" +" var Count = [];\n" +" var Sorted = [];\n" +" for(var i = 0; i < TimerInfo.length; ++i)\n" +" {\n" +" Sum.push(0.0);\n" +" Count.push(0);\n" +" Sorted.push(i);\n" +" }\n" +" var nFrameFirst = 0;\n" +" var nFrameLast = Frames.length;\n" +"\n" +" var nNumLogs = Frames[0].ts.length;\n" +" var StackPosArray = Array(nNumLogs);\n" +" var StackArray = Array(nNumLogs);\n" +" for(var i = 0; i < nNumLogs; ++i)\n" +" {\n" +" StackPosArray[i] = 0;\n" +" StackArray[i] = Array(20);\n" +" }\n" +"\n" +" for(var i = nFrameFirst; i < nFrameLast; i++)\n" +" {\n" +" var fr = Frames[i];\n" +" for(nLog = 0; nLog < nNumLogs; nLog++)\n" +" {\n" +" var StackPos = StackPosArray[nLog];\n" +" var Stack = StackArray[nLog];\n" +" var ts = fr.ts[nLog];\n" +" var ti = fr.ti[nLog];\n" +" var tt = fr.tt[nLog];\n" +" var count = ts.length;\n" +" for(j = 0; j < count; j++)\n" +" {\n" +" var type = tt[j];\n" +" var index = ti[j];\n" +" var time = ts[j];\n" +" if(type == 1 && time < fEnd) //enter\n" +" {\n" +" Stack[StackPos] = time < fBegin ? fBegin : time;\n" +" if(StackArray[nLog][StackPos] != time)\n" +" {\n" +" console.log(\'fail fail fail\');\n" +" }\n" +" StackPos++;\n" +" }\n" +" else if(type == 0) // leave\n" +" {\n" +" if(StackPos>0)\n" +" {\n" +" var timeend = time;\n" +" StackPos--;\n" +" timestart = Stack[StackPos];\n" +" var TimeDelta = timeend - timestart;\n" +" Sum[index] += TimeDelta;\n" +" Count[index]++;\n" +" }\n" +" }\n" +" }\n" +" StackPosArray[nLog] = StackPos;\n" +" }\n" +" }\n" +" Sorted.sort(function(a,b){ return Sum[b] - Sum[a]; } );\n" +" var Result = {\"Sorted\" : Sorted, \"Sum\" : Sum, \"Count\" : Count};\n" +" return Result;\n" +"}\n" +"function CalculateTimers(Result, TimerIndex, nFrameFirst, nFrameLast)\n" +"{\n" +" if(!nFrameFirst || nFrameFirst < 0)\n" +" nFrameFirst = 0;\n" +" if(!nFrameLast || nFrameLast > Frames.length)\n" +" nFrameLast = Frames.length;\n" +" var FrameCount = nFrameLast - nFrameFirst;\n" +" if(0 == FrameCount)\n" +" return;\n" +" var CallCount = 0;\n" +" var Sum = 0;\n" +" var Max = 0;\n" +" var FrameMax = 0;\n" +"\n" +" var nNumLogs = Frames[0].ts.length;\n" +" var StackPosArray = Array(nNumLogs);\n" +" var StackArray = Array(nNumLogs);\n" +" for(var i = 0; i < nNumLogs; ++i)\n" +" {\n" +" StackPosArray[i] = 0;\n" +" StackArray[i] = Array(20);\n" +" }\n" +"\n" +" for(var i = nFrameFirst; i < nFrameLast; i++)\n" +" {\n" +" var FrameSum = 0;\n" +" var fr = Frames[i];\n" +" for(nLog = 0; nLog < nNumLogs; nLog++)\n" +" {\n" +" var StackPos = StackPosArray[nLog];\n" +" var Stack = StackArray[nLog];\n" +" var ts = fr.ts[nLog];\n" +" var ti = fr.ti[nLog];\n" +" var tt = fr.tt[nLog];\n" +" var count = ts.length;\n" +" for(j = 0; j < count; j++)\n" +" {\n" +" var type = tt[j];\n" +" var index = ti[j];\n" +" var time = ts[j];\n" +" if(type == 1) //enter\n" +" {\n" +" //push\n" +" Stack[StackPos] = time;\n" +" if(StackArray[nLog][StackPos] != time)\n" +" {\n" +" console.log(\'fail fail fail\');\n" +" }\n" +" StackPos++;\n" +" }\n" +" else if(type == 0) // leave\n" +" {\n" +" var timestart;\n" +" var timeend = time;\n" +" if(StackPos>0)\n" +" {\n" +" StackPos--;\n" +" timestart = Stack[StackPos];\n" +" }\n" +" else\n" +" {\n" +" timestart = Frames[nFrameFirst].framestart;\n" +" }\n" +" if(index == TimerIndex)\n" +" {\n" +" var TimeDelta = timeend - timestart;\n" +" CallCount++;\n" +" FrameSum += TimeDelta;\n" +" Sum += TimeDelta;\n" +" if(TimeDelta > Max)\n" +" Max = TimeDelta;\n" +" }\n" +" }\n" +" else\n" +" {\n" +" //meta\n" +" }\n" +" }\n" +" if(FrameSum > FrameMax)\n" +" {\n" +" FrameMax = FrameSum;\n" +" }\n" +" StackPosArray[nLog] = StackPos;\n" +" }\n" +" }\n" +"\n" +" Result.CallCount = CallCount;\n" +" Result.Sum = Sum.toFixed(3);\n" +" Result.Max = Max.toFixed(3);\n" +" Result.Average = (Sum / CallCount).toFixed(3);\n" +" Result.FrameAverage = (Sum / FrameCount).toFixed(3);\n" +" Result.FrameCallAverage = (CallCount / FrameCount).toFixed(3);\n" +" Result.FrameMax = FrameMax.toFixed(3);\n" +" return Result;\n" +"}\n" +"\n" +"function PreprocessCalculateAllTimers()\n" +"{\n" +" ProfileEnter(\"CalculateAllTimers\");\n" +" var nFrameFirst = 0;\n" +" var nFrameLast = Frames.length;\n" +" var FrameCount = nFrameLast - nFrameFirst;\n" +" if(0 == FrameCount)\n" +" return;\n" +" for(var j = 0; j < TimerInfo.length; j++)\n" +" {\n" +" TimerInfo[j].CallCount = 0;\n" +" TimerInfo[j].Sum = 0;\n" +" TimerInfo[j].Max = 0;\n" +" TimerInfo[j].FrameMax = 0;\n" +" }\n" +"\n" +"\n" +" var nNumLogs = Frames[0].ts.length;\n" +" var StackPosArray = Array(nNumLogs);\n" +" var StackArray = Array(nNumLogs);\n" +" for(var i = 0; i < nNumLogs; ++i)\n" +" {\n" +" StackPosArray[i] = 0;\n" +" StackArray[i] = Array(20);\n" +" }\n" +"\n" +" for(var i = nFrameFirst; i < nFrameLast; i++)\n" +" {\n" +" for(var j = 0; j < TimerInfo.length; j++)\n" +" {\n" +" TimerInfo[j].FrameSum = 0;\n" +" }\n" +"\n" +" var fr = Frames[i];\n" +" for(nLog = 0; nLog < nNumLogs; nLog++)\n" +" {\n" +" var StackPos = StackPosArray[nLog];\n" +" var Stack = StackArray[nLog];\n" +" var ts = fr.ts[nLog];\n" +" var ti = fr.ti[nLog];\n" +" var tt = fr.tt[nLog];\n" +" var count = ts.length;\n" +" for(j = 0; j < count; j++)\n" +" {\n" +" var type = tt[j];\n" +" var index = ti[j];\n" +" var time = ts[j];\n" +" if(type == 1) //enter\n" +" {\n" +" //push\n" +" Stack[StackPos] = time;\n" +" if(StackArray[nLog][StackPos] != time)\n" +" {\n" +" console.log(\'fail fail fail\');\n" +" }\n" +" StackPos++;\n" +" }\n" +" else if(type == 0) // leave\n" +" {\n" +" var timestart;\n" +" var timeend = time;\n" +" if(StackPos>0)\n" +" {\n" +" StackPos--;\n" +" timestart = Stack[StackPos];\n" +" }\n" +" else\n" +" {\n" +" timestart = Frames[nFrameFirst].framestart;\n" +" }\n" +" // if(index == TimerIndex)\n" +" {\n" +" var TimeDelta = timeend - timestart;\n" +" TimerInfo[index].CallCount++;\n" +" TimerInfo[index].FrameSum += TimeDelta;\n" +" TimerInfo[index].Sum += TimeDelta;\n" +" if(TimeDelta > TimerInfo[index].Max)\n" +" TimerInfo[index].Max = TimeDelta;\n" +" }\n" +" }\n" +" else\n" +" {\n" +" //meta\n" +" }\n" +" }\n" +" for(var j = 0; j < TimerInfo.length; j++)\n" +" {\n" +" if(TimerInfo[j].FrameSum > TimerInfo[j].FrameMax)\n" +" {\n" +" TimerInfo[j].FrameMax = TimerInfo[j].FrameSum;\n" +" }\n" +" }\n" +" StackPosArray[nLog] = StackPos;\n" +" }\n" +"\n" +"\n" +" }\n" +"\n" +" for(var j = 0; j < TimerInfo.length; j++)\n" +" {\n" +" var CallCount = TimerInfo[j].CallCount;\n" +" var Sum = TimerInfo[j].Sum.toFixed(3);\n" +" var Max = TimerInfo[j].Max.toFixed(3);\n" +" var Average = (TimerInfo[j].Sum / TimerInfo[j].CallCount).toFixed(3);\n" +" var FrameAverage = (TimerInfo[j].Sum / FrameCount).toFixed(3);\n" +" var FrameCallAverage = (TimerInfo[j].CallCount / FrameCount).toFixed(3);\n" +" var FrameMax = TimerInfo[j].FrameMax.toFixed(3);\n" +" TimerInfo[j].CallCount = CallCount;\n" +" TimerInfo[j].Sum = Sum;\n" +" TimerInfo[j].Max = Max ;\n" +" TimerInfo[j].Average = Average;\n" +" TimerInfo[j].FrameAverage = FrameAverage;\n" +" TimerInfo[j].FrameCallAverage = FrameCallAverage;\n" +" TimerInfo[j].FrameMax = FrameMax;\n" +" }\n" +" ProfileLeave();\n" +"}\n" +"\n" +"var FlashFrames = 10;\n" +"var FlashFrameCounter = 0;\n" +"var FlashMessage = \'\';\n" +"function TimeString(Diff)\n" +"{\n" +" var DiffString = \"0 sec\";\n" +" var DiffTable = [1,60,60*60,60*60*24];\n" +" var DiffNameTable = [\"sec\", \"min\", \"hr\", \"day\"];\n" +" for(var i = 0; i < DiffTable.length; ++i)\n" +" {\n" +" if(Diff >= DiffTable[i])\n" +" {\n" +" DiffString = Math.floor(Diff / DiffTable[i]) + \" \" + DiffNameTable[i];\n" +" }\n" +" }\n" +" return DiffString;\n" +"\n" +"}\n" +"function ShowFlashMessage(Message, FrameCount)\n" +"{\n" +" FlashMessage = Message;\n" +" FlashFrameCounter = FrameCount;\n" +"}\n" +"function OnPageReady()\n" +"{\n" +" var DumpDate = DumpUtcCaptureTime;\n" +" var CurrentDate = Date.now() / 1000;\n" +" var Diff = CurrentDate - DumpDate;\n" +" var Limit = 10*60;//flash old message when loading captures older than 10 minutes \n" +" if(Diff > Limit)\n" +" {\n" +" ShowFlashMessage(\"Captured \" + TimeString(Diff) + \" ago\", 100);\n" +" }\n" +" if(!nHideHelp)\n" +" {\n" +" ShowHelp(1,0);\n" +" }\n" +"}\n" +"\n" +"function DrawFlashMessage(context)\n" +"{\n" +" if(FlashFrameCounter > 0)\n" +" {\n" +" if(FlashFrameCounter>1)\n" +" {\n" +" var FlashPrc = Math.sin(FlashFrameCounter / FlashFrames);\n" +" context.font = FontFlash;\n" +" context.globalAlpha = FlashPrc * 0.35 + 0.5;\n" +" context.textAlign = \'center\';\n" +" context.fillStyle = \'red\';\n" +" context.fillText(FlashMessage, nWidth * 0.5, 50);\n" +" context.globalAlpha = 1;\n" +" context.textAlign = \'left\';\n" +" context.font = Font;\n" +" }\n" +" FlashFrameCounter -= 1;\n" +"\n" +" }\n" +"}\n" +"\n" +"function DrawCaptureInfo(context)\n" +"{\n" +" context.fillStyle = \'white\';\n" +" context.textAlign = \'right\';\n" +" context.font = Font;\n" +" var DumpDate = DumpUtcCaptureTime;\n" +" var CurrentDate = Date.now() / 1000;\n" +" var Diff = CurrentDate - DumpDate;\n" +" var DiffString = TimeString(Diff) + \" ago\";\n" +" context.fillText(new Date(DumpDate*1000).toLocaleString(), nWidth, FontHeight);\n" +" if(Mode == ModeTimers)\n" +" {\n" +" context.fillText(\"Timer Frames: \" + AggregateInfo.Frames, nWidth, FontHeight*2);\n" +" }\n" +" else\n" +" {\n" +" context.fillText(\"Detailed Frames \"+ Frames.length, nWidth, FontHeight*2);\n" +" }\n" +" context.fillText(DumpHost, nWidth, FontHeight*3);\n" +" context.fillText(DiffString, nWidth, FontHeight*4);\n" +" context.textAlign = \'left\';\n" +" DrawFlashMessage(context);\n" +"}\n" +"\n" +"function DrawDetailedFrameHistory()\n" +"{\n" +" ProfileEnter(\"DrawDetailedFrameHistory\");\n" +" var x = HistoryViewMouseX;\n" +"\n" +" var context = CanvasHistory.getContext(\'2d\');\n" +" context.clearRect(0, 0, CanvasHistory.width, CanvasHistory.height);\n" +"\n" +" var fHeight = nHistoryHeight;\n" +" var fWidth = nWidth / Frames.length;\n" +" var fHeightScale = fHeight / ReferenceTime;\n" +" var fX = 0;\n" +" var FrameIndex = -1;\n" +" var MouseDragging = MouseDragState != MouseDragOff;\n" +" fRangeBeginHistory = fRangeEndHistory = -1;\n" +" fRangeBeginHistoryGpu = fRangeEndHistoryGpu = -1;\n" +"\n" +" var FrameFirst = -1;\n" +" var FrameLast = nWidth;\n" +" var fDetailedOffsetEnd = fDetailedOffset + fDetailedRange;\n" +" for(i = 0; i < Frames.length; i++)\n" +" {\n" +" var fMs = Frames[i].frameend - Frames[i].framestart;\n" +" if(fDetailedOffset <= Frames[i].frameend && fDetailedOffset >= Frames[i].framestart)\n" +" {\n" +" var lerp = (fDetailedOffset - Frames[i].framestart) / (Frames[i].frameend - Frames[i].framestart);\n" +" FrameFirst = fX + fWidth * lerp;\n" +" }\n" +" if(fDetailedOffsetEnd <= Frames[i].frameend && fDetailedOffsetEnd >= Frames[i].framestart)\n" +" {\n" +" var lerp = (fDetailedOffsetEnd - Frames[i].framestart) / (Frames[i].frameend - Frames[i].framestart);\n" +" FrameLast = fX + fWidth * lerp;\n" +" }\n" +" var fH = fHeightScale * fMs;\n" +" var bMouse = x > fX && x < fX + fWidth;\n" +" if(bMouse && !MouseDragging)\n" +" {\n" +" context.fillStyle = FRAME_HISTORY_COLOR_GPU;\n" +" fRangeBeginHistory = Frames[i].framestart;\n" +" fRangeEndHistory = Frames[i].frameend;\n" +" if(Frames[i].framestartgpu)\n" +" {\n" +" fRangeBeginHistoryGpu = Frames[i].framestartgpu;\n" +" fRangeEndHistoryGpu = Frames[i].frameendgpu;\n" +" }\n" +" FrameIndex = i;\n" +" }\n" +" else\n" +" {\n" +" context.fillStyle = FRAME_HISTORY_COLOR_CPU;\n" +" }\n" +" context.fillRect(fX, fHeight - fH, fWidth-1, fH);\n" +" fX += fWidth;\n" +" }\n" +"\n" +" var fRangeHistoryBegin = FrameFirst;\n" +" var fRangeHistoryEnd = FrameLast;\n" +" var X = fRangeHistoryBegin;\n" +" var Y = 0;\n" +" var W = fRangeHistoryEnd - fRangeHistoryBegin;\n" +" context.globalAlpha = 0.35;\n" +" context.fillStyle = \'#009900\';\n" +" context.fillRect(X, Y, W, fHeight);\n" +" context.globalAlpha = 1;\n" +" context.strokeStyle = \'#00ff00\';\n" +" context.beginPath();\n" +" context.moveTo(X, Y);\n" +" context.lineTo(X, Y+fHeight);\n" +" context.moveTo(X+W, Y);\n" +" context.lineTo(X+W, Y+fHeight);\n" +" context.stroke();\n" +"\n" +"\n" +"\n" +"\n" +" DrawCaptureInfo(context);\n" +"\n" +" if(FrameIndex>=0 && !MouseDragging)\n" +" {\n" +" var StringArray = [];\n" +" StringArray.push(\"Frame\");\n" +" StringArray.push(\"\" + FrameIndex);\n" +" StringArray.push(\"Time\");\n" +" StringArray.push(\"\" + (Frames[FrameIndex].frameend - Frames[FrameIndex].framestart).toFixed(3));\n" +"\n" +" DrawToolTip(StringArray, CanvasHistory, HistoryViewMouseX, HistoryViewMouseY+20);\n" +"\n" +" }\n" +" ProfileLeave();\n" +"}\n" +"function TimeToMsString(Time)\n" +"{\n" +" return Time.toFixed(3) + \"ms\";\n" +"}\n" +"function TimeToString(Time)\n" +"{\n" +" if(Time > 1000)\n" +" {\n" +" return (Time/1000.0).toFixed(0) +\"s\";\n" +" }\n" +" else if(Time > 0.9)\n" +" {\n" +" return Time.toFixed(0) + \"ms\";\n" +" }\n" +" else if(Time > 0.0009)\n" +" {\n" +" return (Time*1000).toFixed(0) + \"us\";\n" +" }\n" +" else\n" +" {\n" +" return (Time*1000000).toFixed(0) + \"ns\";\n" +" }\n" +"}\n" +"\n" +"function DrawDetailedBackground(context)\n" +"{\n" +" var fMs = fDetailedRange;\n" +" var fMsEnd = fMs + fDetailedOffset;\n" +" var fMsToScreen = nWidth / fMs;\n" +" var fRate = Math.floor(2*((Math.log(fMs)/Math.log(10))-1))/2;\n" +" var fStep = Math.pow(10, fRate);\n" +" var fRcpStep = 1.0 / fStep;\n" +" var nColorIndex = Math.floor(fDetailedOffset * fRcpStep) % 2;\n" +" if(nColorIndex < 0)\n" +" nColorIndex = -nColorIndex;\n" +" var fStart = Math.floor(fDetailedOffset * fRcpStep) * fStep;\n" +" var fHeight = CanvasDetailedView.height;\n" +" var fScaleX = nWidth / fDetailedRange; \n" +" var HeaderString = TimeToString(fStep);\n" +" context.textAlign = \'center\';\n" +" for(f = fStart; f < fMsEnd; )\n" +" {\n" +" var fNext = f + fStep;\n" +" var X = (f - fDetailedOffset) * fScaleX;\n" +" var W = (fNext-f)*fScaleX;\n" +" context.fillStyle = nBackColors[nColorIndex];\n" +" context.fillRect(X, 0, W+2, fHeight);\n" +" nColorIndex = 1 - nColorIndex;\n" +" context.fillStyle = \'#777777\'\n" +" context.fillText(HeaderString, X + W * 0.5, 10);\n" +" context.fillText(HeaderString, X + W * 0.5, nHeight - 10);\n" +" f = fNext;\n" +" }\n" +" context.textAlign = \'left\';\n" +" var fScaleX = nWidth / fDetailedRange; \n" +" context.globalAlpha = 0.5;\n" +" context.strokeStyle = \'#bbbbbb\';\n" +" context.beginPath();\n" +" for(var i = 0; i < Frames.length; i++)\n" +" {\n" +" var frfr = Frames[i];\n" +" if(frfr.frameend < fDetailedOffset || frfr.framestart > fDetailedOffset + fDetailedRange)\n" +" {\n" +" continue;\n" +" }\n" +" var X = (frfr.framestart - fDetailedOffset) * fScaleX;\n" +" if(X >= 0 && X < nWidth)\n" +" {\n" +" context.moveTo(X, 0);\n" +" context.lineTo(X, nHeight);\n" +" }\n" +" }\n" +" context.stroke();\n" +" context.globalAlpha = 1;\n" +"\n" +"}\n" +"function DrawToolTip(StringArray, Canvas, x, y)\n" +"{\n" +" var context = Canvas.getContext(\'2d\');\n" +" context.font = Font;\n" +" var WidthArray = Array(StringArray.length);\n" +" var nMaxWidth = 0;\n" +" var nHeight = 0;\n" +" for(i = 0; i < StringArray.length; i += 2)\n" +" {\n" +" var nWidth0 = context.measureText(StringArray[i]).width;\n" +" var nWidth1 = context.measureText(StringArray[i+1]).width;\n" +" var nSum = nWidth0 + nWidth1;\n" +" WidthArray[i] = nWidth0;\n" +" WidthArray[i+1] = nWidth1;\n" +" if(nSum > nMaxWidth)\n" +" {\n" +" nMaxWidth = nSum;\n" +" }\n" +" nHeight += BoxHeight;\n" +" }\n" +" nMaxWidth += 15;\n" +" //bounds check.\n" +" var CanvasRect = Canvas.getBoundingClientRect();\n" +" if(y + nHeight > CanvasRect.height)\n" +" {\n" +" y = CanvasRect.height - nHeight;\n" +" x += 20;\n" +" }\n" +" if(x + nMaxWidth > CanvasRect.width)\n" +" {\n" +" x = CanvasRect.width - nMaxWidth;\n" +" }\n" +"\n" +" context.fillStyle = \'black\';\n" +" context.fillRect(x-1, y, nMaxWidth+2, nHeight);\n" +" context.fillStyle = \'white\';\n" +"\n" +" var XPos = x;\n" +" var XPosRight = x + nMaxWidth;\n" +" var YPos = y + BoxHeight-2;\n" +" for(i = 0; i < StringArray.length; i += 2)\n" +" {\n" +" context.fillText(StringArray[i], XPos, YPos);\n" +" context.fillText(StringArray[i+1], XPosRight - WidthArray[i+1], YPos);\n" +" YPos += BoxHeight;\n" +" }\n" +"}\n" +"function DrawHoverToolTip()\n" +"{\n" +" ProfileEnter(\"DrawHoverToolTip\");\n" +" if(nHoverToken != -1)\n" +" {\n" +" var StringArray = [];\n" +" var groupid = TimerInfo[nHoverToken].group;\n" +" StringArray.push(\"Timer\");\n" +" StringArray.push(TimerInfo[nHoverToken].name);\n" +" StringArray.push(\"Group\");\n" +" StringArray.push(GroupInfo[groupid].name);\n" +"\n" +" var bShowTimers = Mode == ModeTimers;\n" +" if(FlipToolTip)\n" +" {\n" +" bShowTimers = !bShowTimers;\n" +" }\n" +" if(bShowTimers)\n" +" {\n" +"\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +" var Timer = TimerInfo[nHoverToken];\n" +" StringArray.push(\"Average\");\n" +" StringArray.push(Timer.average);\n" +" StringArray.push(\"Max\");\n" +" StringArray.push(Timer.max);\n" +" StringArray.push(\"Excl Max\");\n" +" StringArray.push(Timer.exclmax);\n" +" StringArray.push(\"Excl Average\");\n" +" StringArray.push(Timer.exclaverage);\n" +" StringArray.push(\"Call Average\");\n" +" StringArray.push(Timer.callaverage);\n" +" StringArray.push(\"Call Count\");\n" +" StringArray.push(Timer.callcount);\n" +"\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +"\n" +"\n" +" StringArray.push(\"Group\");\n" +" StringArray.push(GroupInfo[groupid].name);\n" +" StringArray.push(\"Average\");\n" +" StringArray.push(GroupInfo[groupid].average);\n" +" StringArray.push(\"Max\");\n" +" StringArray.push(GroupInfo[groupid].max);\n" +"\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +"\n" +" StringArray.push(\""; + +const size_t g_MicroProfileHtml_end_0_size = sizeof(g_MicroProfileHtml_end_0); +const char g_MicroProfileHtml_end_1[] = +"Timer Capture\");\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"Frames\");\n" +" StringArray.push(AggregateInfo.Frames);\n" +" StringArray.push(\"Time\");\n" +" StringArray.push(AggregateInfo.Time.toFixed(2) + \"ms\");\n" +"\n" +"\n" +"\n" +"\n" +" }\n" +" else\n" +" {\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +"\n" +"\n" +"\n" +" StringArray.push(\"Time\");\n" +" StringArray.push((fRangeEnd-fRangeBegin).toFixed(3));\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"Total\");\n" +" StringArray.push(\"\" + TimerInfo[nHoverToken].Sum);\n" +" StringArray.push(\"Max\");\n" +" StringArray.push(\"\" + TimerInfo[nHoverToken].Max);\n" +" StringArray.push(\"Average\");\n" +" StringArray.push(\"\" + TimerInfo[nHoverToken].Average);\n" +" StringArray.push(\"Count\");\n" +" StringArray.push(\"\" + TimerInfo[nHoverToken].CallCount);\n" +"\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +"\n" +" StringArray.push(\"Max/Frame\");\n" +" StringArray.push(\"\" + TimerInfo[nHoverToken].FrameMax);\n" +"\n" +" StringArray.push(\"Average Time/Frame\");\n" +" StringArray.push(\"\" + TimerInfo[nHoverToken].FrameAverage);\n" +"\n" +" StringArray.push(\"Average Count/Frame\");\n" +" StringArray.push(\"\" + TimerInfo[nHoverToken].FrameCallAverage);\n" +"\n" +"\n" +"\n" +"\n" +"\n" +" \n" +" if(nHoverFrame != -1)\n" +" {\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"Frame \" + nHoverFrame);\n" +" StringArray.push(\"\");\n" +"\n" +" var FrameTime = new Object();\n" +" CalculateTimers(FrameTime, nHoverToken, nHoverFrame, nHoverFrame+1);\n" +" StringArray.push(\"Total\");\n" +" StringArray.push(\"\" + FrameTime.Sum);\n" +" StringArray.push(\"Count\");\n" +" StringArray.push(\"\" + FrameTime.CallCount);\n" +" StringArray.push(\"Average\");\n" +" StringArray.push(\"\" + FrameTime.Average);\n" +" StringArray.push(\"Max\");\n" +" StringArray.push(\"\" + FrameTime.Max);\n" +" }\n" +"\n" +" var HoverInfo = GatherHoverMetaCounters(nHoverToken, nHoverTokenIndex, nHoverTokenLogIndex, nHoverFrame);\n" +" var Header = 0;\n" +" for(index in HoverInfo)\n" +" {\n" +" if(0 == Header)\n" +" {\n" +" Header = 1;\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"Meta\");\n" +" StringArray.push(\"\");\n" +"\n" +" }\n" +" StringArray.push(\"\"+index);\n" +" StringArray.push(\"\"+HoverInfo[index]);\n" +" }\n" +"\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +"\n" +" StringArray.push(\"Detailed Capture\");\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"Frames\");\n" +" StringArray.push(Frames.length);\n" +" StringArray.push(\"Time\");\n" +" StringArray.push(DetailedTotal().toFixed(2) + \"ms\");\n" +"\n" +"\n" +" }\n" +" DrawToolTip(StringArray, CanvasDetailedView, DetailedViewMouseX, DetailedViewMouseY+20);\n" +" }\n" +" else if(nHoverCSCpu >= 0)\n" +" {\n" +" var StringArray = [];\n" +" StringArray.push(\"Context Switch\");\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"\");\n" +" StringArray.push(\"Cpu\");\n" +" StringArray.push(\"\" + nHoverCSCpu);\n" +" StringArray.push(\"Begin\");\n" +" StringArray.push(\"\" + fRangeBegin);\n" +" StringArray.push(\"End\");\n" +" StringArray.push(\"\" + fRangeEnd);\n" +" DrawToolTip(StringArray, CanvasDetailedView, DetailedViewMouseX, DetailedViewMouseY+20);\n" +" }\n" +" ProfileLeave();\n" +"}\n" +"\n" +"function FormatMeta(Value, Dec)\n" +"{\n" +" if(!Value)\n" +" {\n" +" Value = \"0\";\n" +" }\n" +" else\n" +" {\n" +" Value = \'\' + Value.toFixed(Dec);\n" +" }\n" +" return Value;\n" +"}\n" +"\n" +"function DrawBarView()\n" +"{\n" +" ProfileEnter(\"DrawBarView\");\n" +" Invalidate++;\n" +" nHoverToken = -1;\n" +" nHoverFrame = -1;\n" +" var context = CanvasDetailedView.getContext(\'2d\');\n" +" context.clearRect(0, 0, nWidth, nHeight);\n" +"\n" +" var Height = BoxHeight;\n" +" var Width = nWidth;\n" +"\n" +" //clamp offset to prevent scrolling into the void\n" +" var nTotalRows = 0;\n" +" for(var groupid in GroupInfo)\n" +" {\n" +" if(GroupsAllActive || GroupsActive[GroupInfo[groupid].name])\n" +" {\n" +" nTotalRows += GroupInfo[groupid].TimerArray.length + 1;\n" +" }\n" +" }\n" +" var nTotalRowPixels = nTotalRows * Height;\n" +" var nFrameRows = nHeight - BoxHeight;\n" +" if(nOffsetBarsY + nFrameRows > nTotalRowPixels && nTotalRowPixels > nFrameRows)\n" +" {\n" +" nOffsetBarsY = nTotalRowPixels - nFrameRows;\n" +" }\n" +"\n" +"\n" +" var Y = -nOffsetBarsY + BoxHeight;\n" +" if(TimersGroups)\n" +" {\n" +" nOffsetBarsX = 0;\n" +" }\n" +" var XBase = -nOffsetBarsX;\n" +" var nColorIndex = 0;\n" +"\n" +" context.fillStyle = \'white\';\n" +" context.font = Font;\n" +" var bMouseIn = 0;\n" +" var RcpReferenceTime = 1.0 / ReferenceTime;\n" +" var CountWidth = 8 * FontWidth;\n" +" var nMetaLen = TimerInfo[0].meta.length;\n" +" var nMetaCharacters = 10;\n" +" for(var i = 0; i < nMetaLen; ++i)\n" +" {\n" +" if(nMetaCharacters < MetaNames[i].length)\n" +" nMetaCharacters = MetaNames[i].length;\n" +" }\n" +" var nWidthMeta = nMetaCharacters * FontWidth + 6;\n" +" function DrawHeaderSplit(Header)\n" +" {\n" +" context.fillStyle = \'white\';\n" +" context.fillText(Header, X, Height-FontAscent);\n" +" X += nWidthBars;\n" +" context.fillStyle = nBackColorOffset;\n" +" X += nWidthMs;\n" +" if(X >= NameWidth)\n" +" {\n" +" context.fillRect(X-3, 0, 1, nHeight);\n" +" }\n" +" }\n" +" function DrawHeaderSplitSingle(Header, Width)\n" +" {\n" +" context.fillStyle = \'white\';\n" +" context.fillText(Header, X, Height-FontAscent);\n" +" X += Width;\n" +" context.fillStyle = nBackColorOffset;\n" +" if(X >= NameWidth)\n" +" {\n" +" context.fillRect(X-3, 0, 1, nHeight);\n" +" }\n" +" }\n" +" function DrawHeaderSplitLeftRight(HeaderLeft, HeaderRight, Width)\n" +" {\n" +" context.textAlign = \'left\';\n" +" context.fillStyle = \'white\';\n" +" context.fillText(HeaderLeft, X, Height-FontAscent);\n" +" X += Width;\n" +" context.textAlign = \'right\';\n" +" context.fillText(HeaderRight, X-5, Height-FontAscent);\n" +" context.textAlign = \'left\';\n" +" context.fillStyle = nBackColorOffset;\n" +" if(X >= NameWidth)\n" +" {\n" +" context.fillRect(X-3, 0, 1, nHeight);\n" +" }\n" +" }\n" +" function DrawTimer(Value, Color)\n" +" {\n" +" var Prc = Value * RcpReferenceTime;\n" +" var YText = Y+Height-FontAscent;\n" +" if(Prc > 1)\n" +" {\n" +" Prc = 1;\n" +" }\n" +" context.fillStyle = Color;\n" +" context.fillRect(X+1, Y+1, Prc * nBarsWidth, InnerBoxHeight);\n" +" X += nWidthBars;\n" +" context.fillStyle = \'white\';\n" +" context.fillText((\" \" + Value.toFixed(2)).slice(-TimerLen), X, YText);\n" +" X += nWidthMs;\n" +" }\n" +" function DrawMeta(Value, Width, Dec)\n" +" {\n" +" Value = FormatMeta(Value, Dec);\n" +" X += (FontWidth*Width);\n" +" context.textAlign = \'right\';\n" +" context.fillText(Value, X-FontWidth, YText);\n" +" context.textAlign = \'left\';\n" +" }\n" +" var InnerBoxHeight = BoxHeight-2;\n" +" var TimerLen = 6;\n" +" var TimerWidth = TimerLen * FontWidth;\n" +" var nWidthBars = nBarsWidth+2;\n" +" var nWidthMs = TimerWidth+2+10;\n" +"\n" +"\n" +" if(2 == TimersGroups)\n" +" {\n" +" for(var i = 0; i < ThreadNames.length; ++i)\n" +" {\n" +" if(ThreadsActive[ThreadNames[i]] || ThreadsAllActive)\n" +" {\n" +" var X = 0;\n" +" var YText = Y+Height-FontAscent;\n" +" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n" +" nColorIndex = 1-nColorIndex;\n" +" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n" +" context.fillRect(0, Y, Width, Height);\n" +" var ThreadColor = CSwitchColors[i % CSwitchColors.length];\n" +" context.fillStyle = ThreadColor;\n" +" context.fillText(ThreadNames[i], 1, YText);\n" +" context.textAlign = \'left\';\n" +" Y += Height;\n" +" for(var idx in GroupOrder)\n" +" {\n" +" var groupid = GroupOrder[idx];\n" +" var Group = GroupInfo[groupid];\n" +" var PerThreadTimer = ThreadGroupTimeArray[i][groupid];\n" +" var PerThreadTimerTotal = ThreadGroupTimeTotalArray[i][groupid];\n" +" if((PerThreadTimer > 0.0001|| PerThreadTimerTotal>0.1) && (GroupsAllActive || GroupsActive[Group.name]))\n" +" {\n" +" var GColor = GroupColors ? GroupInfo[groupid].color : \'white\';\n" +" var X = 0;\n" +" nColorIndex = 1-nColorIndex;\n" +" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n" +" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n" +" context.fillRect(0, Y, Width, nHeight);\n" +" context.fillStyle = GColor;\n" +" context.textAlign = \'right\';\n" +" context.fillText(Group.name, NameWidth - 5, Y+Height-FontAscent);\n" +" context.textAlign = \'left\';\n" +" X += NameWidth;\n" +" DrawTimer(PerThreadTimer, GColor);\n" +" X += nWidthBars + nWidthMs; \n" +" DrawTimer(PerThreadTimerTotal, GColor);\n" +"\n" +" Y += Height;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" }\n" +" else\n" +" {\n" +" for(var idx in GroupOrder)\n" +" {\n" +" var groupid = GroupOrder[idx];\n" +" var Group = GroupInfo[groupid];\n" +" var GColor = GroupColors ? GroupInfo[groupid].color : \'white\';\n" +" if(GroupsAllActive || GroupsActive[Group.name])\n" +" {\n" +" var TimerArray = Group.TimerArray;\n" +" var X = XBase;\n" +" nColorIndex = 1-nColorIndex;\n" +" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n" +" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n" +" context.fillRect(0, Y, Width, nHeight);\n" +" context.fillStyle = GColor;\n" +" context.fillText(Group.name, 1, Y+Height-FontAscent);\n" +" X += NameWidth;\n" +" DrawTimer(Group.average, GColor);\n" +" DrawTimer(Group.max, GColor);\n" +" DrawTimer(Group.total, GColor);\n" +"\n" +" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n" +" context.fillRect(0, Y, NameWidth, nHeight);\n" +" context.fillStyle = GColor;\n" +" context.fillText(Group.name, 1, Y+Height-FontAscent);\n" +"\n" +"\n" +"\n" +" Y += Height;\n" +" if(TimersGroups)\n" +" {\n" +" for(var i = 0; i < ThreadNames.length; ++i)\n" +" {\n" +" var PerThreadTimer = ThreadGroupTimeArray[i][groupid];\n" +" var PerThreadTimerTotal = ThreadGroupTimeTotalArray[i][groupid];\n" +" if((PerThreadTimer > 0.0001|| PerThreadTimerTotal>0.1) && (ThreadsActive[ThreadNames[i]] || ThreadsAllActive))\n" +" {\n" +" var YText = Y+Height-FontAscent;\n" +" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n" +" nColorIndex = 1-nColorIndex;\n" +" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n" +" context.fillRect(0, Y, Width, Height);\n" +" var ThreadColor = CSwitchColors[i % CSwitchColors.length];\n" +" context.fillStyle = ThreadColor;\n" +" context.textAlign = \'right\';\n" +" context.fillText(ThreadNames[i], NameWidth - 5, YText);\n" +" context.textAlign = \'left\';\n" +" X = NameWidth;\n" +" DrawTimer(PerThreadTimer, ThreadColor);\n" +" X += nWidthBars + nWidthMs; \n" +" DrawTimer(PerThreadTimerTotal, ThreadColor);\n" +" Y += Height;\n" +" }\n" +" }\n" +" }\n" +" else\n" +" {\n" +" for(var timerindex in TimerArray)\n" +" {\n" +" var timerid = TimerArray[timerindex];\n" +" var Timer = TimerInfo[timerid];\n" +" var Average = Timer.average;\n" +" var Max = Timer.max;\n" +" var ExclusiveMax = Timer.exclmax;\n" +" var ExclusiveAverage = Timer.exclaverage;\n" +" var CallAverage = Timer.callaverage;\n" +" var CallCount = Timer.callcount;\n" +" var YText = Y+Height-FontAscent;\n" +" X = NameWidth + XBase;\n" +"\n" +" nColorIndex = 1-nColorIndex;\n" +" bMouseIn = DetailedViewMouseY >= Y && DetailedViewMouseY < Y + BoxHeight;\n" +" if(bMouseIn)\n" +" {\n" +" nHoverToken = timerid;\n" +" }\n" +" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n" +" context.fillRect(0, Y, Width, Height);\n" +"\n" +" DrawTimer(Average, Timer.color);\n" +" DrawTimer(Max,Timer.color);\n" +" DrawTimer(Timer.total,Timer.color);\n" +" DrawTimer(CallAverage,Timer.color);\n" +" context.fillStyle = \'white\';\n" +" context.fillText(CallCount, X, YText);\n" +" X += CountWidth;\n" +" DrawTimer(ExclusiveAverage,Timer.color);\n" +" DrawTimer(ExclusiveMax,Timer.color);\n" +"\n" +" if(TimersMeta)\n" +" {\n" +" context.fillStyle = \'white\';\n" +" for(var j = 0; j < nMetaLen; ++j)\n" +" {\n" +" // var Len = MetaNames[j].length + 1;\n" +" DrawMeta(Timer.meta[j], MetaLengths[j], 0);\n" +" DrawMeta(Timer.metaavg[j], MetaLengthsAvg[j], 2);\n" +" DrawMeta(Timer.metamax[j], MetaLengthsMax[j], 0);\n" +" }\n" +" }\n" +" context.fillStyle = bMouseIn ? nBackColorOffset : nBackColors[nColorIndex];\n" +" context.fillRect(0, Y, NameWidth, Height);\n" +" context.textAlign = \'right\';\n" +" context.fillStyle = Timer.color;\n" +" context.fillText(Timer.name, NameWidth - 5, YText);\n" +" context.textAlign = \'left\';\n" +"\n" +"\n" +" Y += Height;\n" +" } \n" +" }\n" +" }\n" +" }\n" +" }\n" +" X = 0;\n" +" context.fillStyle = nBackColorOffset;\n" +" context.fillRect(0, 0, Width, Height);\n" +" context.fillStyle = \'white\';\n" +" if(TimersGroups)\n" +" {\n" +" if(2 == TimersGroups)\n" +" {\n" +" DrawHeaderSplitLeftRight(\'Thread\', \'Group\', NameWidth);\n" +" DrawHeaderSplit(\'Average\');\n" +" }\n" +" else\n" +" {\n" +" DrawHeaderSplitLeftRight(\'Group\', \'Thread\', NameWidth);\n" +" DrawHeaderSplit(\'Average\');\n" +" DrawHeaderSplit(\'Max\');\n" +" DrawHeaderSplit(\'Total\');\n" +" }\n" +" }\n" +" else\n" +" {\n" +" X = NameWidth + XBase;\n" +" DrawHeaderSplit(\'Average\');\n" +" DrawHeaderSplit(\'Max\');\n" +" DrawHeaderSplit(\'Total\');\n" +" DrawHeaderSplit(\'Call Average\');\n" +" DrawHeaderSplitSingle(\'Count\', CountWidth);\n" +" DrawHeaderSplit(\'Excl Average\');\n" +" DrawHeaderSplit(\'Excl Max\');\n" +" if(TimersMeta)\n" +" {\n" +" for(var i = 0; i < nMetaLen; ++i)\n" +" {\n" +" DrawHeaderSplitSingle(MetaNames[i], MetaLengths[i] * FontWidth);\n" +" DrawHeaderSplitSingle(MetaNames[i] + \" Avg\", MetaLengthsAvg[i] * FontWidth);\n" +" DrawHeaderSplitSingle(MetaNames[i] + \" Max\", MetaLengthsMax[i] * FontWidth);\n" +" }\n" +" }\n" +" X = 0;\n" +" context.fillStyle = nBackColorOffset;\n" +" context.fillRect(0, 0, NameWidth, Height);\n" +" context.fillStyle = \'white\';\n" +" \n" +" DrawHeaderSplitLeftRight(\'Group\', \'Timer\', NameWidth);\n" +" \n" +" }\n" +"\n" +" ProfileLeave();\n" +"}\n" +"\n" +"\n" +"//preprocess context switch data to contain array per thread\n" +"function PreprocessContextSwitchCacheItem(ThreadId)\n" +"{\n" +" console.log(\'context switch preparing \' + ThreadId);\n" +" var CSObject = CSwitchCache[ThreadId];\n" +" if(ThreadId > 0 && !CSObject)\n" +" {\n" +" CSArrayIn = new Array();\n" +" CSArrayOut = new Array();\n" +" CSArrayCpu = new Array();\n" +" var nCount = CSwitchTime.length;\n" +" var j = 0;\n" +" var TimeIn = -1.0;\n" +" for(var i = 0; i < nCount; ++i)\n" +" { \n" +" var ThreadIn = CSwitchThreadInOutCpu[j];\n" +" var ThreadOut = CSwitchThreadInOutCpu[j+1];\n" +" var Cpu = CSwitchThreadInOutCpu[j+2];\n" +" if(TimeIn < 0)\n" +" {\n" +" if(ThreadIn == ThreadId)\n" +" {\n" +" TimeIn = CSwitchTime[i];\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if(ThreadOut == ThreadId)\n" +" {\n" +" var TimeOut = CSwitchTime[i];\n" +" CSArrayIn.push(TimeIn);\n" +" CSArrayOut.push(TimeOut);\n" +" CSArrayCpu.push(Cpu);\n" +" TimeIn = -1;\n" +" }\n" +" }\n" +" j += 3;\n" +" }\n" +" CSObject = new Object();\n" +" CSObject.Size = CSArrayIn.length;\n" +" CSObject.In = CSArrayIn;\n" +" CSObject.Out = CSArrayOut;\n" +" CSObject.Cpu = CSArrayCpu;\n" +" CSwitchCache[ThreadId] = CSObject;\n" +" }\n" +"\n" +"}\n" +"function PreprocessContextSwitchCache()\n" +"{\n" +" ProfileEnter(\"PreprocessContextSwitchCache\");\n" +" var AllThreads = {};\n" +" var nCount = CSwitchTime.length;\n" +" for(var i = 0; i < nCount; ++i)\n" +" { \n" +" var nThreadIn = CSwitchThreadInOutCpu[i];\n" +" if(!AllThreads[nThreadIn])\n" +" {\n" +" AllThreads[nThreadIn] = \'\' + nThreadIn;\n" +" var FoundThread = false;\n" +" for(var i = 0; i < ThreadIds.length; ++i)\n" +" {\n" +" if(ThreadIds[i] == nThreadIn)\n" +" {\n" +" FoundThread = true;\n" +" }\n" +" }\n" +" if(!FoundThread)\n" +" {\n" +" CSwitchOnlyThreads.push(nThreadIn);\n" +" }\n" +" }\n" +" }\n" +" for(var i = 0; i < CSwitchOnlyThreads.length; ++i)\n" +" {\n" +" PreprocessContextSwitchCacheItem(CSwitchOnlyThreads[i]);\n" +" }\n" +" for(var i = 0; i < ThreadIds.length; ++i)\n" +" {\n" +" PreprocessContextSwitchCacheItem(ThreadIds[i]); \n" +" }\n" +" ProfileLeave();\n" +"}\n" +"\n" +"function DrawContextSwitchBars(context, ThreadId, fScaleX, fOffsetY, fDetailedOffset, nHoverColor, MinWidth, bDrawEnabled)\n" +"{\n" +" ProfileEnter(\"DrawContextSwitchBars\");\n" +" var CSObject = CSwitchCache[ThreadId];\n" +" if(CSObject)\n" +" {\n" +" var Size = CSObject.Size; \n" +" var In = CSObject.In;\n" +" var Out = CSObject.Out;\n" +" var Cpu = CSObject.Cpu;\n" +" var nNumColors = CSwitchColors.length;\n" +" for(var i = 0; i < Size; ++i)\n" +" {\n" +" var TimeIn = In[i];\n" +" var TimeOut = Out[i];\n" +" var ActiveCpu = Cpu[i];\n" +"\n" +" var X = (TimeIn - fDetailedOffset) * fScaleX;\n" +" if(X > nWidth)\n" +" {\n" +" break;\n" +" }\n" +" var W = (TimeOut - TimeIn) * fScaleX;\n" +" if(W > MinWidth && X+W > 0)\n" +" {\n" +" if(nHoverCSCpu == ActiveCpu || bDrawEnabled)\n" +" {\n" +" if(nHoverCSCpu == ActiveCpu)\n" +" {\n" +" context.fillStyle = nHoverColor;\n" +" }\n" +" else\n" +" {\n" +" context.fillStyle = CSwitchColors[ActiveCpu % nNumColors];\n" +" }\n" +" context.fillRect(X, fOffsetY, W, CSwitchHeight);\n" +" }\n" +" if(DetailedViewMouseX >= X && DetailedViewMouseX <= X+W && DetailedViewMouseY < fOffsetY+CSwitchHeight && DetailedViewMouseY >= fOffsetY)\n" +" {\n" +" nHoverCSCpuNext = ActiveCpu;\n" +" fRangeBeginNext = TimeIn;\n" +" fRangeEndNext = TimeOut;\n" +" fRangeBeginGpuNext = fRangeEndGpuNext = -1;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" ProfileLeave();\n" +"}\n" +"\n" +"function DrawDetailedView(context, MinWidth, bDrawEnabled)\n" +"{\n" +" if(bDrawEnabled)\n" +" {\n" +" DrawDetailedBackground(context);\n" +" }\n" +"\n" +" var colors = [ \'#ff0000\', \'#ff00ff\', \'#ffff00\'];\n" +"\n" +" var fScaleX = nWidth / fDetailedRange; \n" +" var fOffsetY = -nOffsetY + BoxHeight;\n" +" nHoverTokenNext = -1;\n" +" nHoverTokenLogIndexNext = -1;\n" +" nHoverTokenIndexNext = -1;\n" +" nHoverCounter += nHoverCounterDelta;\n" +" if(nHoverCounter >= 255) \n" +" {\n" +" nHoverCounter = 255;\n" +" nHoverCounterDelta = -nHoverCounterDelta;\n" +" }\n" +" if(nHoverCounter < 128) \n" +" {\n" +" nHoverCounter = 128;\n" +" nHoverCounterDelta = -nHoverCounterDelta;\n" +" }\n" +" var nHoverHigh = nHoverCounter.toString(16);\n" +" var nHoverLow = (127+255-nHoverCounter).toString(16);\n" +" var nHoverColor = \'#\' + nHoverHigh + nHoverHigh + nHoverHigh;\n" +"\n" +" context.fillStyle = \'black\';\n" +" context.font = Font;\n" +" var nNumLogs = Frames[0].ts.length;\n" +" var fTimeEnd = fDetailedOffset + fDetailedRange;\n" +"\n" +" var FirstFrame = 0;\n" +" for(var i = 0; i < Frames.length ; i++)\n" +" {\n" +" if(Frames[i].frameend < fDetailedOffset)\n" +" {\n" +" FirstFrame = i;\n" +" }\n" +" }\n" +" var nMinTimeMs = MinWidth / fScaleX;\n" +" {\n" +"\n" +" var Batches = new Array(TimerInfo.length);\n" +" var BatchesTxt = Array();\n" +" var BatchesTxtPos = Array();\n" +" var BatchesTxtColor = [\'#ffffff\', \'#333333\'];\n" +"\n" +" for(var i = 0; i < 2; ++i)\n" +" {\n" +" BatchesTxt[i] = Array();\n" +" BatchesTxtPos[i] = Array();\n" +" }\n" +" for(var i = 0; i < Batches.length; ++i)\n" +" {\n" +" Batches[i] = Array();\n" +" }\n" +" for(nLog = 0; nLog < nNumLogs; nLog++)\n" +" {\n" +" var ThreadName = ThreadNames[nLog];\n" +" if(ThreadsAllActive || ThreadsActive[ThreadName])\n" +" {\n" +"\n" +" var LodIndex = 0;\n" +" var MinDelta = 0;\n" +" var NextLod = 1;\n" +" while(NextLod < LodData.length && LodData[NextLod].MinDelta[nLog] < nMinTimeMs)\n" +" {\n" +" LodIndex = NextLod;\n" +" NextLod = NextLod + 1;\n" +" MinDelta = LodData[LodIndex].MinDelta[nLog];\n" +" }\n" +" if(LodIndex == LodData.length)\n" +" {\n" +" LodIndex = LodData.length-1;\n" +" }\n" +" if(DisableLod)\n" +" {\n" +" LodIndex = 0;\n" +" }\n" +"\n" +" context.fillStyle = \'white\';\n" +" fOffsetY += BoxHeight;\n" +" context.fillText(ThreadName, 0, fOffsetY);\n" +" if(nContextSwitchEnabled)\n" +" {\n" +" DrawContextSwitchBars(context, ThreadIds[nLog], fScaleX, fOffsetY, fDetailedOffset, nHoverColor, MinWidth, bDrawEnabled);\n" +" fOffsetY += CSwitchHeight+1;\n" +" }\n" +" var MaxDepth = 1;\n" +" var StackPos = 0;\n" +" var Stack = Array(20);\n" +" var Lod = LodData[LodIndex];\n" +"\n" +" var TypeArray = Lod.TypeArray[nLog];\n" +" var IndexArray = Lod.IndexArray[nLog];\n" +" var TimeArray = Lod.TimeArray[nLog];\n" +"\n" +" var LocalFirstFrame = Frames[FirstFrame].FirstFrameIndex[nLog];\n" +" var IndexStart = Lod.LogStart[LocalFirstFrame][nLog];\n" +" var IndexEnd = TimeArray.length;\n" +" IndexEnd = TimeArray.length;\n" +" var HasSetHover = 0;\n" +"\n" +"\n" +" for(var j = IndexStart; j < IndexEnd; ++j)\n" +" {\n" +" var type = TypeArray[j];\n" +" var index = IndexArray[j];\n" +" var time = TimeArray[j];\n" +" if(type == 1)\n" +" {\n" +" //push\n" +" Stack[StackPos] = j;\n" +" StackPos++;\n" +" if(StackPos > MaxDepth)\n" +" {\n" +" MaxDepth = StackPos;\n" +" }\n" +" }\n" +" else if(type == 0)\n" +" {\n" +" if(StackPos>0)\n" +" {\n" +" StackPos--;\n" +"\n" +" var StartIndex = Stack[StackPos];\n" +" var timestart = TimeArray[StartIndex];\n" +" var timeend = time;\n" +" var X = (timestart - fDetailedOffset) * fScaleX;\n" +" var Y = fOffsetY + StackPos * BoxHeight;\n" +" var W = (timeend-timestart)*fScaleX;\n" +"\n" +" if(W > MinWidth && X < nWidth && X+W > 0)\n" +" {\n" +" if(bDrawEnabled || index == nHoverToken)\n" +" {\n" +" Batches[index].push(X);\n" +" Batches[index].push(Y);\n" +" Batches[index].push(W);\n" +" DebugDrawQuadCount++;\n" +"\n" +" var XText = X < 0 ? 0 : X;\n" +" var WText = W - (XText-X);\n" +" if(XText + WText > nWidth)\n" +" {\n" +" WText = nWidth - XText;\n" +" }\n" +" var Name = TimerInfo[index].name;\n" +" var NameLen = TimerInfo[index].len;\n" +" var BarTextLen = Math.floor((WText-2)/FontWidth);\n" +" var TimeText = TimeToMsString(timeend-timestart);\n" +" var TimeTextLen = TimeText.length;\n" +"\n" +" if(BarTextLen >= 2)\n" +" {\n" +" if(BarTextLen < NameLen)\n" +" Name = Name.substr(0, BarTextLen);\n" +" var txtidx = TimerInfo[index].textcolorindex;\n" +" var YPos = Y+BoxHeight-FontAscent;\n" +" BatchesTxt[txtidx].push(Name);\n" +" BatchesTxtPos[txtidx].push(XText+2);\n" +"\n" +" BatchesTxtPos[txtidx].push(YPos);\n" +" DebugDrawTextCount++;\n" +" if(BarTextLen - NameLen > TimeTextLen)\n" +" {\n" +" BatchesTxt[txtidx].push(TimeText);\n" +" BatchesTxtPos[txtidx].push(XText+WText-2 - TimeTextLen * FontWidth);\n" +" BatchesTxtPos[txtidx].push(YPos);\n" +" DebugDrawTextCount++;\n" +" }\n" +"\n" +" }\n" +" }\n" +"\n" +" if(DetailedViewMouseX >= X && DetailedViewMouseX <= X+W && DetailedViewMouseY < Y+BoxHeight && DetailedViewMouseY >= Y)\n" +" {\n" +" fRangeBeginNext = timestart;\n" +" fRangeEndNext = timeend;\n" +" if(TypeArray[StartIndex+1] == 3 && TypeArray[j+1] == 3)\n" +" {\n" +" fRangeBeginGpuNext = fRangeBeginNext;\n" +" fRangeEndGpuNext = fRangeEndNext;\n" +" //cpu tick is stored following\n" +" fRangeBeginNext = TimeArray[StartIndex+1];\n" +" fRangeEndNext = TimeArray[j+1];\n" +" }\n" +" else\n" +" {\n" +" fRangeBeginGpuNext = -1;\n" +" fRangeEndGpuNext = -1;\n" +" }\n" +"\n" +" nHoverTokenNext = index;\n" +" nHoverTokenIndexNext = j;\n" +" nHoverTokenLogIndexNext = nLog;\n" +" bHasSetHover = 1;\n" +" }\n" +" }\n" +" if(StackPos == 0 && time > fTimeEnd)\n" +" break; \n" +" }\n" +" }\n" +" }\n" +" fOffsetY += (1+g_MaxStack[nLog]) * BoxHeight;\n" +"\n" +" if(HasSetHover)\n" +" {\n" +" for(var i = 0; i < Frames.length-1; ++i)\n" +" {\n" +" var IndexStart = Lod.LogStart[i][nLog];\n" +" if(nHoverTokenNext >= IndexStart)\n" +" {\n" +" nHoverFrame = i;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" }\n" +"\n" +" if(nContextSwitchEnabled) //non instrumented threads.\n" +" {\n" +" var ContextSwitchThreads = CSwitchOnlyThreads;\n" +" for(var i = 0; i < ContextSwitchThreads.length; ++i)\n" +" {\n" +" var ThreadId = ContextSwitchThreads[i];\n" +" var ThreadName = \'\' + ThreadId;\n" +" DrawContextSwitchBars(context, ThreadId, fScaleX, fOffsetY, fDetailedOffset, nHoverColor, MinWidth, bDrawEnabled);\n" +" context.fillStyle = \'white\';\n" +" context.fillText(ThreadName, 0, fOffsetY+5);\n" +" fOffsetY += BoxHeight + 1;\n" +" }\n" +" }\n" +"\n" +"\n" +" {\n" +" for(var i = 0; i < Batches.length; ++i)\n" +" {\n" +" var a = Batches[i];\n" +" if(a.length)\n" +" {\n" +" context.fillStyle = TimerInfo[i].colordark;\n" +" if(!DisableMerge)\n" +" {\n" +" for(var j = 0; j < a.length; j += 3)\n" +" { \n" +" var X = a[j];\n" +" var Y = a[j+1];\n" +" var BaseWidth = j + 2;\n" +" var W = a[BaseWidth];\n" +" while(j+1 < a.length && W < 1)\n" +" {\n" +" var jnext = j+3;\n" +" var XNext = a[jnext];\n" +" var YNext = a[jnext+1];\n" +" var WNext = a[jnext+2];\n" +" var Delta = XNext - (X+W);\n" +" var YDelta = Math.abs(Y - YNext); \n" +" if(Delta < 0.3 && YDelta < 0.5 && WNext < 1)\n" +" {\n" +" W = (XNext+WNext) - X;\n" +" a[BaseWidth] = W;\n" +" a[jnext+2] = 0;\n" +" j += 3;\n" +" }\n" +" else\n" +" {\n" +" break;\n" +" }\n" +"\n" +" }\n" +" }\n" +" }\n" +" var off = 0.7;\n" +" var off2 = 2*off;\n" +" context.fillStyle = TimerInfo[i].colordark;\n" +" for(var j = 0; j < a.length; j += 3)\n" +" { \n" +" var X = a[j];\n" +" var Y = a[j+1];\n" +" var W = a[j+2];\n" +" if(W >= 1)\n" +" {\n" +" context.fillRect(X, Y, W, BoxHeight-1);\n" +" }\n" +" }\n" +" \n" +"\n" +" if(i == nHoverToken)\n" +" {\n" +" context.fillStyle = nHoverColor;\n" +" }\n" +" else\n" +" {\n" +" context.fillStyle = TimerInfo[i].color;\n" +" }\n" +" for(var j = 0; j < a.length; j += 3)\n" +" { \n" +" var X = a[j];\n" +" var Y = a[j+1];\n" +" var W = a[j+2];\n" +" if(W > 0)\n" +" {\n" +" context.fillRect(X+off, Y+off, W-off2, BoxHeight-1-off2);\n" +" }\n" +" }\n" +" }\n" +" } \n" +" }\n" +" for(var i = 0; i < BatchesTxt.length; ++i)\n" +" {\n" +" context.fillStyle = BatchesTxtColor[i];\n" +" var TxtArray = BatchesTxt[i];\n" +" var PosArray = BatchesTxtPos[i];\n" +" for(var j = 0; j < TxtArray.length; ++j)\n" +" {\n" +" var k = j * 2;\n" +" context.fillText(TxtArray[j], PosArray[k],PosArray[k+1]);\n" +" }\n" +" }\n" +"\n" +" }\n" +"}\n" +"function DrawTextBox(context, text, x, y, align)\n" +"{\n" +" var textsize = context.measureText(text).width;\n" +" var offsetx = 0;\n" +" var offsety = -FontHeight;\n" +" if(align == \'center\')\n" +" {\n" +" offsetx = -textsize / 2.0;\n" +" }\n" +" else if(align == \'right\')\n" +" {\n" +" offsetx = -textsize;\n" +" }\n" +" context.fillStyle = nBackColors[0];\n" +" context.fillRect(x + offsetx, y + offsety, textsize+2, FontHeight + 2);\n" +" context.fillStyle = \'white\';\n" +" context.fillText(text, x, y);\n" +"\n" +"}\n" +"function DrawRange(context, fBegin, fEnd, ColorBack, ColorFront, Offset, Name)\n" +"{\n" +" if(fBegin < fEnd)\n" +" {\n" +" var fScaleX = nWidth / fDetailedRange; \n" +" var X = (fBegin - fDetailedOffset) * fScaleX;\n" +" var YSpace = (FontHeight+2);\n" +" var Y = YSpace * (Offset);\n" +" var W = (fEnd - fBegin) * fScaleX;\n" +" context.globalAlpha = 0.1;\n" +" context.fillStyle = ColorBack;\n" +" context.fillRect(X, 0, W, nHeight);\n" +" context.globalAlpha = 1;\n" +" context.strokeStyle = ColorFront;\n" +" context.beginPath();\n" +" context.moveTo(X, 0);\n" +" context.lineTo(X, nHeight);\n" +" context.moveTo(X+W, 0);\n" +" context.lineTo(X+W, nHeight);\n" +" context.stroke();\n" +" var Duration = (fEnd - fBegin).toFixed(2) + \"ms\";\n" +" var Center = ((fBegin + fEnd) / 2.0) - fDetailedOffset;\n" +" var DurationWidth = context.measureText(Duration+ \" \").width;\n" +"\n" +" context.fillStyle = \'white\';\n" +" context.textAlign = \'right\';\n" +" var TextPosY = Y + YSpace;\n" +" DrawTextBox(context, \'\' + fBegin.toFixed(2), X-3, TextPosY, \'right\');\n" +" if(DurationWidth < W + 10)\n" +" {\n" +" context.textAlign = \'center\';\n" +" DrawTextBox(context,\'\' + Duration,Center * fScaleX, TextPosY, \'center\');\n" +"\n" +" var W0 = W - DurationWidth + FontWidth*1.5;\n" +" if(W0 > 6)\n" +" {\n" +" var Y0 = Y + FontHeight * 0.5;\n" +" W0 = W0 / 2.0;\n" +" var X0 = X + W0;\n" +" var X1 = X + W - W0;\n" +" context.strokeStyle = ColorFront;\n" +" context.beginPath();\n" +" context.moveTo(X, Y0);\n" +" context.lineTo(X0, Y0);\n" +" context.moveTo(X0, Y0-2);\n" +" context.lineTo(X0, Y0+2);\n" +" context.moveTo(X1, Y0-2);\n" +" context.lineTo(X1, Y0+2);\n" +" context.moveTo(X1, Y0);\n" +" context.lineTo(X + W, Y0);\n" +" context.stroke();\n" +" }\n" +" }\n" +" context.textAlign = \'left\';\n" +" DrawTextBox(context, \'\' + fEnd.toFixed(2), X + W + 2, TextPosY, \'left\');\n" +" DrawTextBox(context, Name, X + W + 2, nHeight - FontHeight - YSpace*Offset, \'left\');\n" +" Offset += 1;\n" +" }\n" +" return Offset;\n" +"}\n" +"\n" +"function DrawDetailed(Animation)\n" +"{\n" +" if(AnimationActive != Animation || !Initialized)\n" +" {\n" +" return;\n" +" }\n" +" ProfileEnter(\"DrawDetailed\");\n" +" DebugDrawQuadCount = 0;\n" +" DebugDrawTextCount = 0;\n" +" nHoverCSCpuNext = -1;\n" +"\n" +" fRangeBeginNext = fRangeEndNext = -1;\n" +" fRangeBeginGpuNext = fRangeEndGpuNext = -1;\n" +" var fRangeBeginGpu = -1;\n" +" var fRangeEndGpu = -1;\n" +"\n" +" var start = new Date();\n" +" nDrawCount++;\n" +"\n" +" var context = CanvasDetailedView.getContext(\'2d\');\n" +" var offscreen = CanvasDetailedOffscreen.getContext(\'2d\');\n" +" var fScaleX = nWidth / fDetailedRange; \n" +" var fOffsetY = -nOffsetY + BoxHeight;\n" +"\n" +" if(DetailedRedrawState.fOffsetY == fOffsetY && DetailedRedrawState.fDetailedOffset == fDetailedOffset && DetailedRedrawState.fDetailedRange == fDetailedRange && !KeyCtrlDown && !KeyShiftDown && !MouseDragButton)\n" +" {\n" +" Invalidate++;\n" +" }\n" +" else\n" +" {\n" +" Invalidate = 0;\n" +" DetailedRedrawState.fOffsetY = fOffsetY;\n" +" DetailedRedrawState.fDetailedOffset = fDetailedOffset;\n" +" DetailedRedrawState.fDetailedRange = fDetailedRange;\n" +" }\n" +" if(Invalidate == 0) //when panning, only draw bars that are a certain width to keep decent framerate\n" +" {\n" +" context.clearRect(0, 0, CanvasDetailedView.width, CanvasDetailedView.height);\n" +" DrawDetailedView(context, nMinWidthPan, true);\n" +" }\n" +" else if(Invalidate == 1) //draw full and store\n" +" {\n" +" offscreen.clearRect(0, 0, CanvasDetailedView.width, CanvasDetailedView.height);\n" +" DrawDetailedView(offscreen, nMinWidth, true);\n" +" OffscreenData = offscreen.getImageData(0, 0, CanvasDetailedOffscreen.width, CanvasDetailedOffscreen.height);\n" +" }\n" +" else//reuse stored result untill next time viewport is changed.\n" +" {\n" +" context.clearRect(0, 0, CanvasDetailedView.width, CanvasDetailedView.height);\n" +" context.putImageData(OffscreenData, 0, 0);\n" +" DrawDetailedView(context, nMinWidth, false);\n" +" }\n" +"\n" +" if(KeyShiftDown || KeyCtrlDown || MouseDragButton || MouseDragSelectRange())\n" +" {\n" +" nHoverToken = -1;\n" +" nHoverTokenIndex = -1;\n" +" nHoverTokenLogIndex = -1;\n" +" fRangeBegin = fRangeEnd = -1;\n" +" }\n" +" else\n" +" {\n" +" nHoverToken = nHoverTokenNext;\n" +" nHoverTokenIndex = nHoverTokenIndexNext;\n" +" nHoverTokenLogIndex = nHoverTokenLogIndexNext;\n" +" if(fRangeBeginHistory < fRangeEndHistory)\n" +" {\n" +" fRangeBegin = fRangeBeginHistory;\n" +" fRangeEnd = fRangeEndHistory;\n" +" fRangeBeginGpu = fRangeBeginHistoryGpu;\n" +" fRangeEndGpu = fRangeEndHistoryGpu;\n" +" }\n" +" else\n" +" {\n" +" fRangeBegin = fRangeBeginNext;\n" +" fRangeEnd = fRangeEndNext;\n" +" fRangeBeginGpu = fRangeBeginGpuNext;\n" +" fRangeEndGpu = fRangeEndGpuNext;\n" +" }\n" +" }\n" +"\n" +" DrawTextBox(context, TimeToMsString(fDetailedOffset), 0, FontHeight, \'left\');\n" +" context.textAlign = \'right\';\n" +" DrawTextBox(context, TimeToMsString(fDetailedOffset + fDetailedRange), nWidth, FontHeight, \'right\');\n" +" context.textAlign = \'left\';\n" +"\n" +" var Offset = 0;\n" +" Offset = DrawRange(context, fRangeBeginSelect, fRangeEndSelect, \'#59d0ff\', \'#00ddff\', Offset, \"Selection\");\n" +" Offset = DrawRange(context, fRangeBegin, fRangeEnd, \'#009900\', \'#00ff00\', Offset, \"Cpu\");\n" +" Offset = DrawRange(context, fRangeBeginGpu, fRangeEndGpu, \'#996600\', \'#775500\', Offset, \"Gpu\");\n" +"\n" +" nHoverCSCpu = nHoverCSCpuNext;\n" +" ProfileLeave();\n" +"}\n" +"\n" +"function ZoomTo(fZoomBegin, fZoomEnd)\n" +"{\n" +" if(fZoomBegin < fZoomEnd)\n" +" {\n" +" AnimationActive = true;\n" +" var fDetailedOffsetOriginal = fDetailedOffset;\n" +" var fDetailedRangeOriginal = fDetailedRange;\n" +" var fDetailedOffsetTarget = fZoomBegin;\n" +" var fDetailedRangeTarget = fZoomEnd - fZoomBegin;\n" +" var TimestampStart = new Date();\n" +" var count = 0;\n" +" function ZoomFunc(Timestamp)\n" +" {\n" +" var fPrc = (new Date() - TimestampStart) / (ZOOM_TIME * 1000.0);\n" +" if(fPrc > 1.0)\n" +" {\n" +" fPrc = 1.0;\n" +" }\n" +" fPrc = Math.pow(fPrc, 0.3);\n" +" fDetailedOffset = fDetailedOffsetOriginal + (fDetailedOffsetTarget - fDetailedOffsetOriginal) * fPrc;\n" +" fDetailedRange = fDetailedRangeOriginal + (fDetailedRangeTarget - fDetailedRangeOriginal) * fPrc;\n" +" DrawDetailed(true);\n" +" if(fPrc >= 1.0)\n" +" {\n" +" AnimationActive = false;\n" +" fDetailedOffset = fDetailedOffsetTarget;\n" +" fDetailedRange = fDetailedRangeTarget;\n" +" }\n" +" else\n" +" {\n" +" requestAnimationFrame(ZoomFunc);\n" +" }\n" +" }\n" +" requestAnimationFrame(ZoomFunc);\n" +" }\n" +"}\n" +"function RequestRedraw()\n" +"{\n" +" Invalidate = 0;\n" +" Draw(1);\n" +"}\n" +"function Draw(RedrawMode)\n" +"{\n" +" if(ProfileMode)\n" +" {\n" +" ProfileModeClear();\n" +" ProfileEnter(\"Total\");\n" +" }\n" +" if(RedrawMode == 1)\n" +" {\n" +" if(Mode == ModeTimers)\n" +" {\n" +" DrawBarView();\n" +" DrawHoverToolTip();\n" +" }\n" +" else if(Mode == ModeDetailed)\n" +" {\n" +" DrawDetailed(false);\n" +" DrawHoverToolTip();\n" +" }\n" +" }\n" +" DrawDetailedFrameHistory();\n" +"\n" +" if(ProfileMode)\n" +" {\n" +" ProfileLeave();\n" +" ProfileModeDraw(CanvasDetailedView);\n" +" }\n" +"}\n" +"\n" +"function AutoRedraw(Timestamp)\n" +"{\n" +" var RedrawMode = 0;\n" +" if(Mode == ModeDetailed)\n" +" {\n" +" if(ProfileMode == 2 || ((nHoverCSCpu >= 0 || nHoverToken != -1) && !KeyCtrlDown && !KeyShiftDown && !MouseDragButton)||(Invalidate<2 && !KeyCtrlDown && !KeyShiftDown && !MouseDragButton))\n" +" {\n" +" RedrawMode = 1;\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if(Invalidate < 1)\n" +" {\n" +" RedrawMode = 1;\n" +" }\n" +" }\n" +" if(RedrawMode)\n" +" {\n" +" Draw(RedrawMode);\n" +" }\n" +" else if(FlashFrameCounter>0)\n" +" {\n" +" Draw(0);\n" +" }\n" +" requestAnimationFrame(AutoRedraw);\n" +"}\n" +"\n" +"\n" +"function ZoomGraph(nZoom)\n" +"{\n" +" var fOldRange = fDetailedRange;\n" +" if(nZoom>0)\n" +" {\n" +" fDetailedRange *= Math.pow(nModDown ? 1.40 : 1.03, nZoom);\n" +" }\n" +" else\n" +" {\n" +" var fNewDetailedRange = fDetailedRange / Math.pow((nModDown ? 1.40 : 1.03), -nZoom);\n" +" if(fNewDetailedRange < 0.0001) //100ns\n" +" fNewDetailedRange = 0.0001;\n" +" fDetailedRange = fNewDetailedRange;\n" +" }\n" +"\n" +" var fDiff = fOldRange - fDetailedRange;\n" +" var fMousePrc = DetailedViewMouseX / nWidth;\n" +" if(fMousePrc < 0)\n" +" {\n" +" fMousePrc = 0;\n" +" }\n" +" fDetailedOffset += fDiff * fMousePrc;\n" +"\n" +"}\n" +"\n" +"function MeasureFont()\n" +"{\n" +" var context = CanvasDetailedView.getContext(\'2d\');\n" +" context.font = Font;\n" +" FontWidth = context.measureText(\'W\').width;\n" +"\n" +"}\n" +"function ResizeCanvas() \n" +"{\n" +" nWidth = window.innerWidth;\n" +" nHeight = window.innerHeight - CanvasHistory.height-2;\n" +" DPR = window.devicePixelRatio;\n" +"\n" +" if(DPR)\n" +" {\n" +" CanvasDetailedView.style.width = nWidth + \'px\'; \n" +" CanvasDetailedView.style.height = nHeight + \'px\';\n" +" CanvasDetailedView.width = nWidth * DPR;\n" +" CanvasDetailedView.height = nHeight * DPR;\n" +" CanvasHistory.style.width = window.innerWidth + \'px\';\n" +" CanvasHistory.style.height = 70 + \'px\';\n" +" CanvasHistory.width = window.innerWidth * DPR;\n" +" CanvasHistory.height = 70 * DPR;\n" +" CanvasHistory.getContext(\'2d\').scale(DPR,DPR);\n" +" CanvasDetailedView.getContext(\'2d\').scale(DPR,DPR);\n" +"\n" +" CanvasDetailedOffscreen.style.width = nWidth + \'px\';\n" +" CanvasDetailedOffscreen.style.height = nHeight + \'px\';\n" +" CanvasDetailedOffscreen.wid"; + +const size_t g_MicroProfileHtml_end_1_size = sizeof(g_MicroProfileHtml_end_1); +const char g_MicroProfileHtml_end_2[] = +"th = nWidth * DPR;\n" +" CanvasDetailedOffscreen.height = nHeight * DPR;\n" +" CanvasDetailedOffscreen.getContext(\'2d\').scale(DPR,DPR);\n" +"\n" +" }\n" +" else\n" +" {\n" +" DPR = 1;\n" +" CanvasDetailedView.width = nWidth;\n" +" CanvasDetailedView.height = nHeight;\n" +" CanvasDetailedOffscreen.width = nWidth;\n" +" CanvasDetailedOffscreen.height = nHeight;\n" +" CanvasHistory.width = window.innerWidth;\n" +" }\n" +" RequestRedraw();\n" +"}\n" +"\n" +"var MouseDragOff = 0;\n" +"var MouseDragDown = 1;\n" +"var MouseDragUp = 2;\n" +"var MouseDragMove = 3;\n" +"var MouseDragState = MouseDragOff;\n" +"var MouseDragTarget = 0;\n" +"var MouseDragButton = 0;\n" +"var MouseDragKeyShift = 0;\n" +"var MouseDragKeyCtrl = 0;\n" +"var MouseDragX = 0;\n" +"var MouseDragY = 0;\n" +"var MouseDragXLast = 0;\n" +"var MouseDragYLast = 0;\n" +"var MouseDragXStart = 0;\n" +"var MouseDragYStart = 0;\n" +"\n" +"function clamp(number, min, max)\n" +"{\n" +" return Math.max(min, Math.min(number, max));\n" +"}\n" +"\n" +"function MouseDragPan()\n" +"{\n" +" return MouseDragButton == 1 || MouseDragKeyShift;\n" +"}\n" +"function MouseDragSelectRange()\n" +"{\n" +" return MouseDragState == MouseDragMove && (MouseDragButton == 3 || (MouseDragKeyShift && MouseDragKeyCtrl));\n" +"}\n" +"function MouseHandleDrag()\n" +"{\n" +" if(MouseDragTarget == CanvasDetailedView)\n" +" {\n" +" if(Mode == ModeDetailed)\n" +" {\n" +" if(MouseDragSelectRange())\n" +" {\n" +" var xStart = MouseDragXStart;\n" +" var xEnd = MouseDragX;\n" +" if(xStart > xEnd)\n" +" {\n" +" var Temp = xStart;\n" +" xStart = xEnd;\n" +" xEnd = Temp;\n" +" }\n" +" if(xEnd - xStart > 1)\n" +" {\n" +" fRangeBegin = fDetailedOffset + fDetailedRange * (xStart / nWidth);\n" +" fRangeEnd = fDetailedOffset + fDetailedRange * (xEnd / nWidth);\n" +" fRangeBeginSelect = fDetailedOffset + fDetailedRange * (xStart / nWidth);\n" +" fRangeEndSelect = fDetailedOffset + fDetailedRange * (xEnd / nWidth);\n" +" }\n" +" }\n" +" else if(MouseDragPan())\n" +" {\n" +" var X = MouseDragX - MouseDragXLast;\n" +" var Y = MouseDragY - MouseDragYLast;\n" +" if(X)\n" +" {\n" +" fDetailedOffset += -X * fDetailedRange / nWidth;\n" +" }\n" +" nOffsetY -= Y;\n" +" if(nOffsetY < 0)\n" +" {\n" +" nOffsetY = 0;\n" +" }\n" +" }\n" +" else if(MouseDragKeyCtrl)\n" +" {\n" +" if(MouseDragY != MouseDragYLast)\n" +" {\n" +" ZoomGraph(MouseDragY - MouseDragYLast);\n" +" }\n" +" }\n" +" }\n" +" else if(Mode == ModeTimers)\n" +" {\n" +" if(MouseDragKeyShift || MouseDragButton == 1)\n" +" {\n" +" var X = MouseDragX - MouseDragXLast;\n" +" var Y = MouseDragY - MouseDragYLast;\n" +" nOffsetBarsY -= Y;\n" +" nOffsetBarsX -= X;\n" +" if(nOffsetBarsY < 0)\n" +" {\n" +" nOffsetBarsY = 0;\n" +" }\n" +" if(nOffsetBarsX < 0)\n" +" {\n" +" nOffsetBarsX = 0;\n" +" }\n" +" }\n" +"\n" +" }\n" +"\n" +" }\n" +" else if(MouseDragTarget == CanvasHistory)\n" +" {\n" +" function HistoryFrameTime(x)\n" +" {\n" +" var NumFrames = Frames.length;\n" +" var fBarWidth = nWidth / NumFrames;\n" +" var Index = clamp(Math.floor(NumFrames * x / nWidth), 0, NumFrames-1);\n" +" var Lerp = clamp((x/fBarWidth - Index) , 0, 1);\n" +" var time = Frames[Index].framestart + (Frames[Index].frameend - Frames[Index].framestart) * Lerp;\n" +" return time;\n" +" }\n" +" if(MouseDragSelectRange())\n" +" {\n" +" fRangeBegin = fRangeEnd = -1;\n" +"\n" +" var xStart = MouseDragXStart;\n" +" var xEnd = MouseDragX;\n" +" if(xStart > xEnd)\n" +" {\n" +" var Temp = xStart;\n" +" xStart = xEnd;\n" +" xEnd = Temp;\n" +" }\n" +" if(xEnd - xStart > 2)\n" +" {\n" +" var timestart = HistoryFrameTime(xStart);\n" +" var timeend = HistoryFrameTime(xEnd);\n" +" fDetailedOffset = timestart;\n" +" fDetailedRange = timeend-timestart;\n" +" }\n" +" }\n" +" else if(MouseDragPan())\n" +" {\n" +" var Time = HistoryFrameTime(MouseDragX);\n" +" fDetailedOffset = Time - fDetailedRange / 2.0;\n" +" }\n" +" }\n" +"}\n" +"function MouseHandleDragEnd()\n" +"{\n" +" if(MouseDragTarget == CanvasDetailedView)\n" +" {\n" +"\n" +" }\n" +" else if(MouseDragTarget == CanvasHistory)\n" +" {\n" +" if(!MouseDragSelectRange() && !MouseDragPan())\n" +" {\n" +" ZoomTo(fRangeBegin, fRangeEnd);\n" +" fRangeBegin = fRangeEnd = -1;\n" +" }\n" +"\n" +"\n" +" }\n" +"\n" +"}\n" +"\n" +"function MouseHandleDragClick()\n" +"{\n" +" if(MouseDragTarget == CanvasDetailedView)\n" +" {\n" +" ZoomTo(fRangeBegin, fRangeEnd);\n" +" }\n" +" else if(MouseDragTarget == CanvasHistory)\n" +" {\n" +" if(Mode == ModeDetailed)\n" +" {\n" +" ZoomTo(fRangeBegin, fRangeEnd);\n" +" }\n" +" }\n" +"}\n" +"\n" +"function MapMouseButton(Event)\n" +"{\n" +" if(event.button == 1 || event.which == 1)\n" +" {\n" +" return 1;\n" +" }\n" +" else if(event.button == 3 || event.which == 3)\n" +" {\n" +" return 3;\n" +" }\n" +" else\n" +" {\n" +" return 0;\n" +" }\n" +"}\n" +"\n" +"function MouseDragReset()\n" +"{\n" +" MouseDragState = MouseDragOff;\n" +" MouseDragTarget = 0;\n" +" MouseDragKeyShift = 0;\n" +" MouseDragKeyCtrl = 0;\n" +" MouseDragButton = 0;\n" +"}\n" +"function MouseDragKeyUp()\n" +"{\n" +" if((MouseDragKeyShift && !KeyShiftDown) || (MouseDragKeyCtrl && !KeyCtrlDown))\n" +" {\n" +" MouseHandleDragEnd();\n" +" MouseDragReset();\n" +" }\n" +"}\n" +"function MouseDrag(Source, Event)\n" +"{\n" +" if(Source == MouseDragOff || (MouseDragTarget && MouseDragTarget != Event.target))\n" +" {\n" +" MouseDragReset();\n" +" return;\n" +" }\n" +"\n" +" var LocalRect = Event.target.getBoundingClientRect();\n" +" MouseDragX = Event.clientX - LocalRect.left;\n" +" MouseDragY = Event.clientY - LocalRect.top;\n" +" // console.log(\'cur drag state \' + MouseDragState + \' Source \' + Source);\n" +" if(MouseDragState == MouseDragMove)\n" +" {\n" +" var dx = Math.abs(MouseDragX - MouseDragXStart);\n" +" var dy = Math.abs(MouseDragY - MouseDragYStart);\n" +" if((Source == MouseDragUp && MapMouseButton(Event) == MouseDragButton) ||\n" +" (MouseDragKeyCtrl && !KeyCtrlDown) ||\n" +" (MouseDragKeyShift && !KeyShiftDown))\n" +" {\n" +" MouseHandleDragEnd();\n" +" MouseDragReset();\n" +" return;\n" +" }\n" +" else\n" +" {\n" +" MouseHandleDrag();\n" +" }\n" +" }\n" +" else if(MouseDragState == MouseDragOff)\n" +" {\n" +" if(Source == MouseDragDown || KeyShiftDown || KeyCtrlDown)\n" +" {\n" +" MouseDragTarget = Event.target;\n" +" MouseDragButton = MapMouseButton(Event);\n" +" MouseDragState = MouseDragDown;\n" +" MouseDragXStart = MouseDragX;\n" +" MouseDragYStart = MouseDragY;\n" +" MouseDragKeyCtrl = 0;\n" +" MouseDragKeyShift = 0;\n" +"\n" +" if(KeyShiftDown || KeyCtrlDown)\n" +" {\n" +" MouseDragKeyShift = KeyShiftDown;\n" +" MouseDragKeyCtrl = KeyCtrlDown;\n" +" MouseDragState = MouseDragMove;\n" +" }\n" +" }\n" +" }\n" +" else if(MouseDragState == MouseDragDown)\n" +" {\n" +" if(Source == MouseDragUp)\n" +" {\n" +" MouseHandleDragClick();\n" +" MouseDragReset();\n" +" }\n" +" else if(Source == MouseDragMove)\n" +" {\n" +" var dx = Math.abs(MouseDragX - MouseDragXStart);\n" +" var dy = Math.abs(MouseDragY - MouseDragYStart);\n" +" if(dx+dy>1)\n" +" {\n" +" MouseDragState = MouseDragMove;\n" +" }\n" +" }\n" +" }\n" +" MouseDragXLast = MouseDragX;\n" +" MouseDragYLast = MouseDragY;\n" +"}\n" +"\n" +"function MouseMove(evt)\n" +"{\n" +" evt.preventDefault();\n" +" MouseDrag(MouseDragMove, evt);\n" +" MouseHistory = 0;\n" +" MouseDetailed = 0;\n" +" HistoryViewMouseX = HistoryViewMouseY = -1;\n" +" var rect = evt.target.getBoundingClientRect();\n" +" var x = evt.clientX - rect.left;\n" +" var y = evt.clientY - rect.top;\n" +" if(evt.target == CanvasDetailedView)\n" +" {\n" +" if(!MouseDragSelectRange())\n" +" {\n" +" fRangeBegin = fRangeEnd = -1;\n" +" }\n" +" DetailedViewMouseX = x;\n" +" DetailedViewMouseY = y;\n" +" }\n" +" else if(evt.target = CanvasHistory)\n" +" {\n" +" var Rect = CanvasHistory.getBoundingClientRect();\n" +" HistoryViewMouseX = x;\n" +" HistoryViewMouseY = y;\n" +" }\n" +" Draw(1);\n" +"}\n" +"\n" +"function MouseButton(bPressed, evt)\n" +"{\n" +" evt.preventDefault();\n" +" MouseDrag(bPressed ? MouseDragDown : MouseDragUp, evt);\n" +"}\n" +"\n" +"function MouseOut(evt)\n" +"{\n" +" MouseDrag(MouseDragOff, evt);\n" +" KeyCtrlDown = 0;\n" +" KeyShiftDown = 0;\n" +" MouseDragButton = 0;\n" +" nHoverToken = -1;\n" +" fRangeBegin = fRangeEnd = -1;\n" +"}\n" +"\n" +"function MouseWheel(e)\n" +"{\n" +" var e = window.event || e;\n" +" var delta = (e.wheelDelta || e.detail * (-120));\n" +" ZoomGraph((-4 * delta / 120.0) | 0);\n" +" Draw(1);\n" +"}\n" +"\n" +"\n" +"function KeyUp(evt)\n" +"{\n" +" if(evt.keyCode == 17)\n" +" {\n" +" KeyCtrlDown = 0;\n" +" MouseDragKeyUp();\n" +" }\n" +" else if(evt.keyCode == 16)\n" +" {\n" +" KeyShiftDown = 0;\n" +" MouseDragKeyUp();\n" +" }\n" +" if(evt.keyCode == 18)\n" +" {\n" +" FlipToolTip = 0;\n" +" }\n" +" if(evt.keyCode == 32)\n" +" {\n" +" if(fRangeBeginSelect < fRangeEndSelect)\n" +" {\n" +" ZoomTo(fRangeBeginSelect, fRangeEndSelect);\n" +" fRangeBeginSelect = fRangeEndSelect = -1;\n" +" MouseHandleDragEnd();\n" +" }\n" +" }\n" +" if(evt.keyCode == 27)\n" +" {\n" +" fRangeBeginSelect = fRangeEndSelect = -1; \n" +" }\n" +" Invalidate = 0;\n" +"}\n" +"\n" +"function KeyDown(evt)\n" +"{\n" +" if(evt.keyCode == 18)\n" +" {\n" +" FlipToolTip = 1;\n" +" }\n" +" if(evt.keyCode == 17)\n" +" {\n" +" KeyCtrlDown = 1;\n" +" }\n" +" else if(evt.keyCode == 16)\n" +" {\n" +" KeyShiftDown = 1;\n" +" }\n" +" Invalidate = 0;\n" +"}\n" +"\n" +"function ReadCookie()\n" +"{\n" +" var result = document.cookie.match(/fisk=([^;]+)/);\n" +" var NewMode = ModeDetailed;\n" +" var ReferenceTimeString = \'33ms\';\n" +" if(result && result.length > 0)\n" +" {\n" +" var Obj = JSON.parse(result[1]);\n" +" if(Obj.Mode)\n" +" {\n" +" NewMode = Obj.Mode;\n" +" }\n" +" if(Obj.ReferenceTime)\n" +" {\n" +" ReferenceTimeString = Obj.ReferenceTime;\n" +" }\n" +" if(Obj.ThreadsAllActive || Obj.ThreadsAllActive == 0 || Obj.ThreadsAllActive == false)\n" +" {\n" +" ThreadsAllActive = Obj.ThreadsAllActive;\n" +" }\n" +" else\n" +" {\n" +" ThreadsAllActive = 1;\n" +" }\n" +" if(Obj.ThreadsActive)\n" +" {\n" +" ThreadsActive = Obj.ThreadsActive;\n" +" }\n" +" if(Obj.GroupsAllActive || Obj.GroupsAllActive == 0 || Obj.GroupsAllActive)\n" +" {\n" +" GroupsAllActive = Obj.GroupsAllActive;\n" +" }\n" +" else\n" +" {\n" +" GroupsAllActive = 1;\n" +" }\n" +" if(Obj.GroupsActive)\n" +" {\n" +" GroupsActive = Obj.GroupsActive;\n" +" }\n" +" if(Obj.nContextSwitchEnabled)\n" +" {\n" +" nContextSwitchEnabled = Obj.nContextSwitchEnabled; \n" +" }\n" +" else\n" +" {\n" +" nContextSwitchEnabled = 1;\n" +" }\n" +" if(Obj.GroupColors)\n" +" {\n" +" GroupColors = Obj.GroupColors;\n" +" }\n" +" else\n" +" {\n" +" GroupColors = 0;\n" +" }\n" +" if(Obj.nHideHelp)\n" +" {\n" +" nHideHelp = 1;\n" +" }\n" +" TimersGroups = Obj.TimersGroups?Obj.TimersGroups:0;\n" +" TimersMeta = Obj.TimersMeta?0:1;\n" +" }\n" +" SetContextSwitch(nContextSwitchEnabled);\n" +" SetMode(NewMode, TimersGroups);\n" +" SetReferenceTime(ReferenceTimeString);\n" +" UpdateOptionsMenu();\n" +" UpdateGroupColors();\n" +"}\n" +"function WriteCookie()\n" +"{\n" +" var Obj = new Object();\n" +" Obj.Mode = Mode;\n" +" Obj.ReferenceTime = ReferenceTime + \'ms\';\n" +" Obj.ThreadsActive = ThreadsActive;\n" +" Obj.ThreadsAllActive = ThreadsAllActive;\n" +" Obj.GroupsActive = GroupsActive;\n" +" Obj.GroupsAllActive = GroupsAllActive;\n" +" Obj.nContextSwitchEnabled = nContextSwitchEnabled;\n" +" Obj.TimersGroups = TimersGroups?TimersGroups:0;\n" +" Obj.TimersMeta = TimersMeta?0:1;\n" +" Obj.GroupColors = GroupColors;\n" +" if(nHideHelp)\n" +" {\n" +" Obj.nHideHelp = 1;\n" +" }\n" +" var date = new Date();\n" +" date.setFullYear(2099);\n" +" var cookie = \'fisk=\' + JSON.stringify(Obj) + \';expires=\' + date;\n" +" document.cookie = cookie;\n" +"}\n" +"\n" +"var mousewheelevt = (/Firefox/i.test(navigator.userAgent)) ? \"DOMMouseScroll\" : \"mousewheel\" //FF doesn\'t recognize mousewheel as of FF3.x\n" +"\n" +"CanvasDetailedView.addEventListener(\'mousemove\', MouseMove, false);\n" +"CanvasDetailedView.addEventListener(\'mousedown\', function(evt) { MouseButton(true, evt); });\n" +"CanvasDetailedView.addEventListener(\'mouseup\', function(evt) { MouseButton(false, evt); } );\n" +"CanvasDetailedView.addEventListener(\'mouseout\', MouseOut);\n" +"CanvasDetailedView.addEventListener(\"contextmenu\", function (e) { e.preventDefault(); }, false);\n" +"CanvasDetailedView.addEventListener(mousewheelevt, MouseWheel, false);\n" +"CanvasHistory.addEventListener(\'mousemove\', MouseMove);\n" +"CanvasHistory.addEventListener(\'mousedown\', function(evt) { MouseButton(true, evt); });\n" +"CanvasHistory.addEventListener(\'mouseup\', function(evt) { MouseButton(false, evt); } );\n" +"CanvasHistory.addEventListener(\'mouseout\', MouseOut);\n" +"CanvasHistory.addEventListener(\"contextmenu\", function (e) { e.preventDefault(); }, false);\n" +"CanvasHistory.addEventListener(mousewheelevt, MouseWheel, false);\n" +"window.addEventListener(\'keydown\', KeyDown);\n" +"window.addEventListener(\'keyup\', KeyUp);\n" +"window.addEventListener(\'resize\', ResizeCanvas, false);\n" +"\n" +"function CalcAverage()\n" +"{\n" +" var Sum = 0;\n" +" var Count = 0;\n" +" for(nLog = 0; nLog < nNumLogs; nLog++)\n" +" {\n" +" StackPos = 0;\n" +" for(var i = 0; i < Frames.length; i++)\n" +" {\n" +" var Frame_ = Frames[i]; \n" +" var tt = Frame_.tt[nLog];\n" +" var ts = Frame_.ts[nLog];\n" +"\n" +" var count = tt.length;\n" +" for(var j = 0; j < count; j++)\n" +" {\n" +" var type = tt[j];\n" +" var time = ts[j];\n" +" if(type == 1)\n" +" {\n" +" Stack[StackPos] = time;//store the frame which it comes from\n" +" StackPos++;\n" +" }\n" +" else if(type == 0)\n" +" {\n" +" if(StackPos>0)\n" +" {\n" +"\n" +" StackPos--;\n" +" var localtime = time - Stack[StackPos];\n" +" Count++;\n" +" Sum += localtime;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" }\n" +" return Sum / Count;\n" +"\n" +"}\n" +"\n" +"function MakeLod(index, MinDelta, TimeArray, TypeArray, IndexArray, LogStart)\n" +"{\n" +" if(LodData[index])\n" +" {\n" +" console.log(\"error!!\");\n" +" }\n" +" // debugger;\n" +" var o = new Object();\n" +" o.MinDelta = MinDelta;\n" +" o.TimeArray = TimeArray;\n" +" o.TypeArray = TypeArray;\n" +" o.IndexArray = IndexArray;\n" +" o.LogStart = LogStart;\n" +" LodData[index] = o;\n" +"}\n" +"\n" +"function PreprocessBuildSplitArray()\n" +"{\n" +" var nNumLogs = Frames[0].ts.length;\n" +"\n" +" ProfileEnter(\"PreprocessBuildSplitArray\");\n" +" var SplitArrays = new Array(nNumLogs);\n" +"\n" +" for(nLog = 0; nLog < nNumLogs; ++nLog)\n" +" {\n" +" console.log(\"source log \" + nLog + \" size \" + LodData[0].TypeArray[nLog].length);\n" +" }\n" +"\n" +"\n" +" for(nLog = 0; nLog < nNumLogs; nLog++)\n" +" {\n" +" var MaxDepth = 1;\n" +" var StackPos = 0;\n" +" var Stack = Array(20);\n" +" var TypeArray = LodData[0].TypeArray[nLog];\n" +" var TimeArray = LodData[0].TimeArray[nLog];\n" +" var DeltaTimes = new Array(TypeArray.length);\n" +"\n" +" for(var j = 0; j < TypeArray.length; ++j)\n" +" {\n" +" var type = TypeArray[j];\n" +" var time = TimeArray[j];\n" +" if(type == 1)\n" +" {\n" +" //push\n" +" Stack[StackPos] = time;\n" +" StackPos++;\n" +" }\n" +" else if(type == 0)\n" +" {\n" +" if(StackPos>0)\n" +" {\n" +" StackPos--;\n" +" DeltaTimes[j] = time - Stack[StackPos];\n" +" }\n" +" else\n" +" {\n" +" DeltaTimes[j] = 0;\n" +" }\n" +" }\n" +" }\n" +" DeltaTimes.sort(function(a,b){return b-a;});\n" +" var SplitArray = Array(NumLodSplits);\n" +" var SplitIndex = DeltaTimes.length;\n" +"\n" +" var j = 0;\n" +" for(j = 0; j < NumLodSplits; ++j)\n" +" {\n" +" SplitIndex = Math.floor(SplitIndex / 2);\n" +" while(SplitIndex > 0 && !DeltaTimes[SplitIndex])\n" +" {\n" +" SplitIndex--;\n" +" }\n" +" if(SplitIndex < SplitMin)\n" +" {\n" +" break;\n" +" }\n" +" //search.. if 0\n" +" var SplitTime = DeltaTimes[SplitIndex];\n" +" if(SplitTime>=0)\n" +" {\n" +" SplitArray[j] = SplitTime;\n" +" }\n" +" else\n" +" {\n" +" SplitArray[j] = SPLIT_LIMIT;\n" +" }\n" +" if(j>0)\n" +" {\n" +" console.assert(SplitArray[j-1] <= SplitArray[j], \"must be less\");\n" +" }\n" +"\n" +" }\n" +" for(; j < NumLodSplits; ++j)\n" +" {\n" +" SplitArray[j] = SPLIT_LIMIT;\n" +" // console.log(\"split skipping \" + j + \" \" + SPLIT_LIMIT);\n" +" }\n" +"\n" +"\n" +" SplitArrays[nLog] = SplitArray;\n" +" }\n" +" ProfileLeave();\n" +" return SplitArrays;\n" +"}\n" +"\n" +"function PreprocessBuildDurationArray()\n" +"{\n" +" var nNumLogs = Frames[0].ts.length;\n" +" ProfileEnter(\"PreprocessBuildDurationArray\");\n" +" var DurationArrays = new Array(nNumLogs);\n" +" for(nLog = 0; nLog < nNumLogs; ++nLog)\n" +" {\n" +" var MaxDepth = 1;\n" +" var StackPos = 0;\n" +" var Stack = Array(20);\n" +" var StackIndex = Array(20);\n" +" var TypeArray = LodData[0].TypeArray[nLog];\n" +" var TimeArray = LodData[0].TimeArray[nLog];\n" +" var DurationArray = Array(LodData[0].TypeArray[nLog].length);\n" +" for(var j = 0; j < TypeArray.length; ++j)\n" +" {\n" +" var type = TypeArray[j];\n" +" var time = TimeArray[j];\n" +" if(type == 1)\n" +" {\n" +" //push\n" +" Stack[StackPos] = time;\n" +" StackIndex[StackPos] = j;\n" +" StackPos++;\n" +" }\n" +" else if(type == 0)\n" +" {\n" +" if(StackPos>0)\n" +" {\n" +" StackPos--;\n" +" var Duration = time - Stack[StackPos];\n" +" DurationArray[StackIndex[StackPos]] = Duration;\n" +" DurationArray[j] = Duration;\n" +" }\n" +" else\n" +" {\n" +" DurationArray[j] = 0;\n" +" }\n" +" }\n" +" }\n" +" for(var j = 0; j < StackPos; ++j)\n" +" {\n" +" DurationArray[j] = 0;\n" +" }\n" +" DurationArrays[nLog] = DurationArray;\n" +" }\n" +" ProfileLeave();\n" +" return DurationArrays;\n" +"\n" +"}\n" +"function PreprocessLods()\n" +"{\n" +" ProfileEnter(\"PreprocessLods\");\n" +" var nNumLogs = Frames[0].ts.length;\n" +" var SplitArrays = PreprocessBuildSplitArray();\n" +" var DurationArrays = PreprocessBuildDurationArray();\n" +" var Source = LodData[0];\n" +" var SourceLogStart = Source.LogStart;\n" +" var NumFrames = SourceLogStart.length;\n" +"\n" +" for(var i = 0; i < NumLodSplits-1; ++i)\n" +" {\n" +" var DestLogStart = Array(SourceLogStart.length);\n" +" for(var j = 0; j < DestLogStart.length; ++j)\n" +" {\n" +" DestLogStart[j] = Array(nNumLogs);\n" +" }\n" +" var MinDelta = Array(nNumLogs);\n" +" var TimeArray = Array(nNumLogs);\n" +" var IndexArray = Array(nNumLogs);\n" +" var TypeArray = Array(nNumLogs);\n" +"\n" +"\n" +"\n" +" for(nLog = 0; nLog < nNumLogs; ++nLog)\n" +" {\n" +" var SourceTypeArray = Source.TypeArray[nLog];\n" +" var SourceTimeArray = Source.TimeArray[nLog];\n" +" var SourceIndexArray = Source.IndexArray[nLog];\n" +" var Duration = DurationArrays[nLog];\n" +" console.assert(Duration.length == SourceTypeArray.length, \"must be equal!\");\n" +" var SplitTime = SplitArrays[nLog][i];\n" +"\n" +" MinDelta[nLog] = SplitTime;\n" +" if(SplitTime < SPLIT_LIMIT)\n" +" {\n" +" var SourceCount = SourceTypeArray.length;\n" +" var DestTypeArray = Array();\n" +" var DestTimeArray = Array();\n" +" var DestIndexArray = Array();\n" +" var RemapArray = Array(SourceCount);\n" +" var DiscardLast = 0;\n" +"\n" +" for(var j = 0; j < SourceCount; ++j)\n" +" {\n" +" RemapArray[j] = DestTypeArray.length;\n" +" if(Duration[j] >= SplitTime || (SourceTypeArray[j] == 3 && 0 == DiscardLast))\n" +" {\n" +" DiscardLast = 0;\n" +" DestTypeArray.push(SourceTypeArray[j]);\n" +" DestTimeArray.push(SourceTimeArray[j]);\n" +" DestIndexArray.push(SourceIndexArray[j]);\n" +" }\n" +" else\n" +" {\n" +" DiscardLast = 1;\n" +" }\n" +" }\n" +" TimeArray[nLog] = DestTimeArray;\n" +" IndexArray[nLog] = DestIndexArray;\n" +" TypeArray[nLog] = DestTypeArray;\n" +" for(var j = 0; j < NumFrames; ++j)\n" +" {\n" +" var OldStart = SourceLogStart[j][nLog];\n" +" var NewStart = RemapArray[OldStart];\n" +" var FrameArray = DestLogStart[j];\n" +" FrameArray[nLog] = NewStart;\n" +" }\n" +" }\n" +" else\n" +" {\n" +"\n" +" for(var j = 0; j < NumFrames; ++j)\n" +" {\n" +" var FrameArray = DestLogStart[j];\n" +" \n" +" FrameArray[nLog] = 0;\n" +" }\n" +"\n" +" }\n" +"\n" +" }\n" +" MakeLod(i+1, MinDelta, TimeArray, TypeArray, IndexArray, DestLogStart);\n" +" }\n" +" ProfileLeave();\n" +"}\n" +"function PreprocessGlobalArray()\n" +"{\n" +" ProfileEnter(\"PreprocessGlobalArray\");\n" +" var nNumLogs = Frames[0].ts.length;\n" +" var CaptureStart = Frames[0].framestart;\n" +" var CaptureEnd = Frames[Frames.length-1].frameend;\n" +" g_TypeArray = new Array(nNumLogs);\n" +" g_TimeArray = new Array(nNumLogs);\n" +" g_IndexArray = new Array(nNumLogs);\n" +" var StackPos = 0;\n" +" var Stack = Array(20);\n" +" var LogStartArray = new Array(Frames.length);\n" +" for(var i = 0; i < Frames.length; i++)\n" +" {\n" +" Frames[i].LogStart = new Array(nNumLogs); \n" +" LogStartArray[i] = Frames[i].LogStart;\n" +"\n" +" Frames[i].LogEnd = new Array(nNumLogs);\n" +" }\n" +" var MinDelta = Array(nNumLogs);\n" +" for(nLog = 0; nLog < nNumLogs; nLog++)\n" +" {\n" +" MinDelta[nLog] = 0;\n" +" var Discard = 0;\n" +" var TypeArray = new Array();\n" +" var TimeArray = new Array();\n" +" var IndexArray = new Array();\n" +" for(var i = 0; i < Frames.length; i++)\n" +" {\n" +" var Frame_ = Frames[i]; \n" +" Frame_.LogStart[nLog] = TimeArray.length;\n" +" var FrameDiscard = Frame_.frameend + 33;//if timestamps are more than 33ms after current frame, we assume buffer has wrapped.\n" +" var tt = Frame_.tt[nLog];\n" +" var ts = Frame_.ts[nLog];\n" +" var ti = Frame_.ti[nLog];\n" +" var len = tt.length;\n" +" var DiscardLast = 0;\n" +" for(var xx = 0; xx < len; ++xx)\n" +" {\n" +" var Skip = (tt[i] == 3) ? DiscardLast : ts[xx] > FrameDiscard;\n" +" if(Skip)\n" +" {\n" +" Discard++;\n" +" DiscardLast = 1;\n" +" }\n" +" else\n" +" {\n" +" DiscardLast = 0;\n" +" TypeArray.push(tt[xx]);\n" +" TimeArray.push(ts[xx]);\n" +" IndexArray.push(ti[xx]);\n" +" }\n" +" }\n" +" Frame_.LogEnd[nLog] = TimeArray.length;\n" +" }\n" +" g_TypeArray[nLog] = TypeArray;\n" +" g_TimeArray[nLog] = TimeArray;\n" +" g_IndexArray[nLog] = IndexArray;\n" +" if(Discard)\n" +" {\n" +" console.log(\'discarded \' + Discard + \' markers from \' + ThreadNames[nLog]);\n" +" }\n" +" }\n" +" MakeLod(0, MinDelta, g_TimeArray, g_TypeArray, g_IndexArray, LogStartArray);\n" +" ProfileLeave();\n" +"}\n" +"\n" +"function PreprocessFindFirstFrames()\n" +"{\n" +" ProfileEnter(\"PreprocesFindFirstFrames\");\n" +" //create arrays that show how far back we need to start search in order to get all markers.\n" +" var nNumLogs = Frames[0].ts.length;\n" +" for(var i = 0; i < Frames.length; i++)\n" +" {\n" +" Frames[i].FirstFrameIndex = new Array(nNumLogs);\n" +" }\n" +"\n" +" var StackPos = 0;\n" +" var Stack = Array(20);\n" +" g_MaxStack = Array(nNumLogs);\n" +" \n" +" for(nLog = 0; nLog < nNumLogs; nLog++)\n" +" {\n" +" var MaxStack = 0;\n" +" StackPos = 0;\n" +" for(var i = 0; i < Frames.length; i++)\n" +" {\n" +" var Frame_ = Frames[i]; \n" +" var tt = Frame_.tt[nLog];\n" +" var count = tt.length;\n" +"\n" +" var FirstFrame = i;\n" +" if(StackPos>0)\n" +" {\n" +" FirstFrame = Stack[0];\n" +" }\n" +" Frames[i].FirstFrameIndex[nLog] = FirstFrame;\n" +"\n" +" for(var j = 0; j < count; j++)\n" +" {\n" +" var type = tt[j];\n" +" if(type == 1)\n" +" {\n" +" Stack[StackPos] = i;//store the frame which it comes from\n" +" StackPos++;\n" +" if(StackPos > MaxStack)\n" +" {\n" +" MaxStack = StackPos;\n" +" }\n" +" }\n" +" else if(type == 0)\n" +" {\n" +" if(StackPos>0)\n" +" {\n" +" StackPos--;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" g_MaxStack[nLog] = MaxStack;\n" +" }\n" +" ProfileLeave();\n" +"}\n" +"function PreprocessMeta()\n" +"{\n" +" MetaLengths = Array(MetaNames.length);\n" +" MetaLengthsAvg = Array(MetaNames.length);\n" +" MetaLengthsMax = Array(MetaNames.length);\n" +" for(var i = 0; i < MetaNames.length; ++i)\n" +" {\n" +" MetaLengths[i] = MetaNames[i].length+1;\n" +" MetaLengthsAvg[i] = MetaNames[i].length+5;\n" +" MetaLengthsMax[i] = MetaNames[i].length+5;\n" +" if(MetaLengths[i]<12)\n" +" MetaLengths[i] = 12;\n" +" if(MetaLengthsAvg[i]<12)\n" +" MetaLengthsAvg[i] = 12;\n" +" if(MetaLengthsMax[i]<12)\n" +" MetaLengthsMax[i] = 12;\n" +" }\n" +" for(var i = 0; i < TimerInfo.length; ++i)\n" +" {\n" +" var Timer = TimerInfo[i];\n" +" for(var j = 0; j < MetaNames.length; ++j)\n" +" {\n" +" var Len = FormatMeta(Timer.meta[j],0).length + 2;\n" +" var LenAvg = FormatMeta(Timer.meta[j],2).length + 2;\n" +" var LenMax = FormatMeta(Timer.meta[j],0).length + 2;\n" +" if(Len > MetaLengths[j])\n" +" {\n" +" MetaLengths[j] = Len;\n" +" }\n" +" if(LenAvg > MetaLengthsAvg[j])\n" +" {\n" +" MetaLengthsAvg[j] = LenAvg;\n" +" }\n" +" if(LenMax > MetaLengthsMax[j])\n" +" {\n" +" MetaLengthsMax[j] = LenMax;\n" +" }\n" +" }\n" +" }\n" +"}\n" +"\n" +"function Preprocess()\n" +"{\n" +" var ProfileModeOld = ProfileMode;\n" +" ProfileMode = 1;\n" +" ProfileModeClear();\n" +" ProfileEnter(\"Preprocess\");\n" +" PreprocessCalculateAllTimers();\n" +" PreprocessFindFirstFrames();\n" +" PreprocessGlobalArray();\n" +" PreprocessLods();\n" +" PreprocessMeta();\n" +" PreprocessContextSwitchCache();\n" +" ProfileLeave();\n" +" ProfileModeDump();\n" +" ProfileMode = ProfileModeOld;\n" +" Initialized = 1;\n" +"}\n" +"\n" +"InitGroups();\n" +"ReadCookie();\n" +"MeasureFont()\n" +"InitThreadMenu();\n" +"InitGroupMenu();\n" +"InitFrameInfo();\n" +"UpdateThreadMenu();\n" +"ResizeCanvas();\n" +"Preprocess();\n" +"OnPageReady();\n" +"Draw(1);\n" +"AutoRedraw();\n" +"\n" +"</script>\n" +"</body>\n" +"</html> "; + +const size_t g_MicroProfileHtml_end_2_size = sizeof(g_MicroProfileHtml_end_2); +const char* g_MicroProfileHtml_end[] = { +&g_MicroProfileHtml_end_0[0], +&g_MicroProfileHtml_end_1[0], +&g_MicroProfileHtml_end_2[0], +}; +size_t g_MicroProfileHtml_end_sizes[] = { +sizeof(g_MicroProfileHtml_end_0), +sizeof(g_MicroProfileHtml_end_1), +sizeof(g_MicroProfileHtml_end_2), +}; +size_t g_MicroProfileHtml_end_count = 3; +#endif //MICROPROFILE_EMBED_HTML + +///end file generated from microprofile.html diff --git a/externals/microprofile/microprofileui.h b/externals/microprofile/microprofileui.h new file mode 100644 index 000000000..eac1119a4 --- /dev/null +++ b/externals/microprofile/microprofileui.h @@ -0,0 +1,3348 @@ +#pragma once +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or +// distribute this software, either in source code form or as a compiled +// binary, for any purpose, commercial or non-commercial, and by any +// means. +// In jurisdictions that recognize copyright laws, the author or authors +// of this software dedicate any and all copyright interest in the +// software to the public domain. We make this dedication for the benefit +// of the public at large and to the detriment of our heirs and +// successors. We intend this dedication to be an overt act of +// relinquishment in perpetuity of all present and future rights to this +// software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// For more information, please refer to <http://unlicense.org/> +// +// *********************************************************************** +// +// +// + + +#ifndef MICROPROFILE_ENABLED +#error "microprofile.h must be included before including microprofileui.h" +#endif + +#ifndef MICROPROFILEUI_ENABLED +#define MICROPROFILEUI_ENABLED MICROPROFILE_ENABLED +#endif + +#ifndef MICROPROFILEUI_API +#define MICROPROFILEUI_API +#endif + + +#if 0 == MICROPROFILEUI_ENABLED +#define MicroProfileMouseButton(foo, bar) do{}while(0) +#define MicroProfileMousePosition(foo, bar, z) do{}while(0) +#define MicroProfileModKey(key) do{}while(0) +#define MicroProfileDraw(foo, bar) do{}while(0) +#define MicroProfileIsDrawing() 0 +#define MicroProfileToggleDisplayMode() do{}while(0) +#define MicroProfileSetDisplayMode(f) do{}while(0) +#else + +#ifndef MICROPROFILE_DRAWCURSOR +#define MICROPROFILE_DRAWCURSOR 0 +#endif + +#ifndef MICROPROFILE_DETAILED_BAR_NAMES +#define MICROPROFILE_DETAILED_BAR_NAMES 1 +#endif + +#ifndef MICROPROFILE_TEXT_WIDTH +#define MICROPROFILE_TEXT_WIDTH 5 +#endif + +#ifndef MICROPROFILE_TEXT_HEIGHT +#define MICROPROFILE_TEXT_HEIGHT 8 +#endif + +#ifndef MICROPROFILE_DETAILED_BAR_HEIGHT +#define MICROPROFILE_DETAILED_BAR_HEIGHT 12 +#endif + +#ifndef MICROPROFILE_DETAILED_CONTEXT_SWITCH_HEIGHT +#define MICROPROFILE_DETAILED_CONTEXT_SWITCH_HEIGHT 7 +#endif + +#ifndef MICROPROFILE_GRAPH_WIDTH +#define MICROPROFILE_GRAPH_WIDTH 256 +#endif + +#ifndef MICROPROFILE_GRAPH_HEIGHT +#define MICROPROFILE_GRAPH_HEIGHT 256 +#endif + +#ifndef MICROPROFILE_BORDER_SIZE +#define MICROPROFILE_BORDER_SIZE 1 +#endif + +#ifndef MICROPROFILE_HELP_LEFT +#define MICROPROFILE_HELP_LEFT "Left-Click" +#endif + +#ifndef MICROPROFILE_HELP_ALT +#define MICROPROFILE_HELP_ALT "Alt-Click" +#endif + +#ifndef MICROPROFILE_HELP_MOD +#define MICROPROFILE_HELP_MOD "Mod" +#endif + +#ifndef MICROPROFILE_BAR_WIDTH +#define MICROPROFILE_BAR_WIDTH 100 +#endif + +#ifndef MICROPROFILE_CUSTOM_MAX +#define MICROPROFILE_CUSTOM_MAX 8 +#endif + +#ifndef MICROPROFILE_CUSTOM_MAX_TIMERS +#define MICROPROFILE_CUSTOM_MAX_TIMERS 64 +#endif + +#ifndef MICROPROFILE_CUSTOM_PADDING +#define MICROPROFILE_CUSTOM_PADDING 12 +#endif + + +#define MICROPROFILE_FRAME_HISTORY_HEIGHT 50 +#define MICROPROFILE_FRAME_HISTORY_WIDTH 7 +#define MICROPROFILE_FRAME_HISTORY_COLOR_CPU 0xffff7f27 //255 127 39 +#define MICROPROFILE_FRAME_HISTORY_COLOR_GPU 0xff37a0ee //55 160 238 +#define MICROPROFILE_FRAME_HISTORY_COLOR_HIGHTLIGHT 0x7733bb44 +#define MICROPROFILE_FRAME_COLOR_HIGHTLIGHT 0x20009900 +#define MICROPROFILE_FRAME_COLOR_HIGHTLIGHT_GPU 0x20996600 +#define MICROPROFILE_NUM_FRAMES (MICROPROFILE_MAX_FRAME_HISTORY - (MICROPROFILE_GPU_FRAME_DELAY+1)) + +#define MICROPROFILE_TOOLTIP_MAX_STRINGS (32 + MICROPROFILE_MAX_GROUPS*2) +#define MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE (4*1024) +#define MICROPROFILE_TOOLTIP_MAX_LOCKED 3 + + +enum +{ + MICROPROFILE_CUSTOM_BARS = 0x1, + MICROPROFILE_CUSTOM_BAR_SOURCE_MAX = 0x2, + MICROPROFILE_CUSTOM_BAR_SOURCE_AVG = 0, + MICROPROFILE_CUSTOM_STACK = 0x4, + MICROPROFILE_CUSTOM_STACK_SOURCE_MAX = 0x8, + MICROPROFILE_CUSTOM_STACK_SOURCE_AVG = 0, +}; + + +MICROPROFILEUI_API void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight); //! call if drawing microprofilers +MICROPROFILEUI_API bool MicroProfileIsDrawing(); +MICROPROFILEUI_API void MicroProfileToggleGraph(MicroProfileToken nToken); +MICROPROFILEUI_API bool MicroProfileDrawGraph(uint32_t nScreenWidth, uint32_t nScreenHeight); +MICROPROFILEUI_API void MicroProfileToggleDisplayMode(); //switch between off, bars, detailed +MICROPROFILEUI_API void MicroProfileSetDisplayMode(int); //switch between off, bars, detailed +MICROPROFILEUI_API void MicroProfileClearGraph(); +MICROPROFILEUI_API void MicroProfileMousePosition(uint32_t nX, uint32_t nY, int nWheelDelta); +MICROPROFILEUI_API void MicroProfileModKey(uint32_t nKeyState); +MICROPROFILEUI_API void MicroProfileMouseButton(uint32_t nLeft, uint32_t nRight); +MICROPROFILEUI_API void MicroProfileDrawLineVertical(int nX, int nTop, int nBottom, uint32_t nColor); +MICROPROFILEUI_API void MicroProfileDrawLineHorizontal(int nLeft, int nRight, int nY, uint32_t nColor); +MICROPROFILEUI_API void MicroProfileLoadPreset(const char* pSuffix); +MICROPROFILEUI_API void MicroProfileSavePreset(const char* pSuffix); + +MICROPROFILEUI_API void MicroProfileDrawText(int nX, int nY, uint32_t nColor, const char* pText, uint32_t nNumCharacters); +MICROPROFILEUI_API void MicroProfileDrawBox(int nX, int nY, int nX1, int nY1, uint32_t nColor, MicroProfileBoxType = MicroProfileBoxTypeFlat); +MICROPROFILEUI_API void MicroProfileDrawLine2D(uint32_t nVertices, float* pVertices, uint32_t nColor); +MICROPROFILEUI_API void MicroProfileDumpTimers(); + +MICROPROFILEUI_API void MicroProfileInitUI(); + +MICROPROFILEUI_API void MicroProfileCustomGroupToggle(const char* pCustomName); +MICROPROFILEUI_API void MicroProfileCustomGroupEnable(const char* pCustomName); +MICROPROFILEUI_API void MicroProfileCustomGroupEnable(uint32_t nIndex); +MICROPROFILEUI_API void MicroProfileCustomGroupDisable(); +MICROPROFILEUI_API void MicroProfileCustomGroup(const char* pCustomName, uint32_t nMaxTimers, uint32_t nAggregateFlip, float fReferenceTime, uint32_t nFlags); +MICROPROFILEUI_API void MicroProfileCustomGroupAddTimer(const char* pCustomName, const char* pGroup, const char* pTimer); + +#ifdef MICROPROFILEUI_IMPL +#ifdef _WIN32 +#define snprintf _snprintf +#endif +#include <stdlib.h> +#include <stdarg.h> +#include <math.h> +#include <algorithm> + +MICROPROFILE_DEFINE(g_MicroProfileDetailed, "MicroProfile", "Detailed View", 0x8888000); +MICROPROFILE_DEFINE(g_MicroProfileDrawGraph, "MicroProfile", "Draw Graph", 0xff44ee00); +MICROPROFILE_DEFINE(g_MicroProfileDrawBarView, "MicroProfile", "DrawBarView", 0x00dd77); +MICROPROFILE_DEFINE(g_MicroProfileDraw,"MicroProfile", "Draw", 0x737373); + + +struct MicroProfileStringArray +{ + const char* ppStrings[MICROPROFILE_TOOLTIP_MAX_STRINGS]; + char Buffer[MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE]; + char* pBufferPos; + uint32_t nNumStrings; +}; + +struct MicroProfileGroupMenuItem +{ + uint32_t nIsCategory; + uint32_t nCategoryIndex; + uint32_t nIndex; + const char* pName; +}; + +struct MicroProfileCustom +{ + char pName[MICROPROFILE_NAME_MAX_LEN]; + uint32_t nFlags; + uint32_t nAggregateFlip; + uint32_t nNumTimers; + uint32_t nMaxTimers; + uint64_t nGroupMask; + float fReference; + uint64_t* pTimers; +}; + +struct SOptionDesc +{ + SOptionDesc(){} + SOptionDesc(uint8_t nSubType, uint8_t nIndex, const char* fmt, ...):nSubType(nSubType), nIndex(nIndex) + { + va_list args; + va_start (args, fmt); + vsprintf(Text, fmt, args); + va_end(args); + } + char Text[32]; + uint8_t nSubType; + uint8_t nIndex; + bool bSelected; +}; +static uint32_t g_MicroProfileAggregatePresets[] = {0, 10, 20, 30, 60, 120}; +static float g_MicroProfileReferenceTimePresets[] = {5.f, 10.f, 15.f,20.f, 33.33f, 66.66f, 100.f, 250.f, 500.f, 1000.f}; +static uint32_t g_MicroProfileOpacityPresets[] = {0x40, 0x80, 0xc0, 0xff}; +static const char* g_MicroProfilePresetNames[] = +{ + MICROPROFILE_DEFAULT_PRESET, + "Render", + "GPU", + "Lighting", + "AI", + "Visibility", + "Sound", +}; + +enum +{ + MICROPROFILE_NUM_REFERENCE_PRESETS = sizeof(g_MicroProfileReferenceTimePresets)/sizeof(g_MicroProfileReferenceTimePresets[0]), + MICROPROFILE_NUM_OPACITY_PRESETS = sizeof(g_MicroProfileOpacityPresets)/sizeof(g_MicroProfileOpacityPresets[0]), +#if MICROPROFILE_CONTEXT_SWITCH_TRACE + MICROPROFILE_OPTION_SIZE = MICROPROFILE_NUM_REFERENCE_PRESETS + MICROPROFILE_NUM_OPACITY_PRESETS * 2 + 2 + 7, +#else + MICROPROFILE_OPTION_SIZE = MICROPROFILE_NUM_REFERENCE_PRESETS + MICROPROFILE_NUM_OPACITY_PRESETS * 2 + 2 + 3, +#endif +}; + +struct MicroProfileUI +{ + //menu/mouse over stuff + uint64_t nHoverToken; + int64_t nHoverTime; + int nHoverFrame; +#if MICROPROFILE_DEBUG + uint64_t nHoverAddressEnter; + uint64_t nHoverAddressLeave; +#endif + + uint32_t nWidth; + uint32_t nHeight; + + + int nOffsetX; + int nOffsetY; + float fDetailedOffset; //display offset relative to start of latest displayable frame. + float fDetailedRange; //no. of ms to display + float fDetailedOffsetTarget; + float fDetailedRangeTarget; + uint32_t nOpacityBackground; + uint32_t nOpacityForeground; + bool bShowSpikes; + + + + uint32_t nMouseX; + uint32_t nMouseY; + uint32_t nMouseDownX; + uint32_t nMouseDownY; + int nMouseWheelDelta; + uint32_t nMouseDownLeft; + uint32_t nMouseDownRight; + uint32_t nMouseLeft; + uint32_t nMouseRight; + uint32_t nMouseLeftMod; + uint32_t nMouseRightMod; + uint32_t nModDown; + uint32_t nActiveMenu; + + MicroProfileLogEntry* pDisplayMouseOver; + + int64_t nRangeBegin; + int64_t nRangeEnd; + int64_t nRangeBeginGpu; + int64_t nRangeEndGpu; + uint32_t nRangeBeginIndex; + uint32_t nRangeEndIndex; + MicroProfileThreadLog* pRangeLog; + uint32_t nHoverColor; + uint32_t nHoverColorShared; + + MicroProfileStringArray LockedToolTips[MICROPROFILE_TOOLTIP_MAX_LOCKED]; + uint32_t nLockedToolTipColor[MICROPROFILE_TOOLTIP_MAX_LOCKED]; + int LockedToolTipFront; + + MicroProfileGroupMenuItem GroupMenu[MICROPROFILE_MAX_GROUPS + MICROPROFILE_MAX_CATEGORIES]; + uint32_t GroupMenuCount; + + + uint32_t nCustomActive; + uint32_t nCustomTimerCount; + uint32_t nCustomCount; + MicroProfileCustom Custom[MICROPROFILE_CUSTOM_MAX]; + uint64_t CustomTimer[MICROPROFILE_CUSTOM_MAX_TIMERS]; + + SOptionDesc Options[MICROPROFILE_OPTION_SIZE]; + + +}; + +MicroProfileUI g_MicroProfileUI; +#define UI g_MicroProfileUI +static uint32_t g_nMicroProfileBackColors[2] = { 0x474747, 0x313131 }; +#define MICROPROFILE_NUM_CONTEXT_SWITCH_COLORS 16 +static uint32_t g_nMicroProfileContextSwitchThreadColors[MICROPROFILE_NUM_CONTEXT_SWITCH_COLORS] = //palette generated by http://tools.medialab.sciences-po.fr/iwanthue/index.php +{ + 0x63607B, + 0x755E2B, + 0x326A55, + 0x523135, + 0x904F42, + 0x87536B, + 0x346875, + 0x5E6046, + 0x35404C, + 0x224038, + 0x413D1E, + 0x5E3A26, + 0x5D6161, + 0x4C6234, + 0x7D564F, + 0x5C4352, +}; + + +void MicroProfileInitUI() +{ + static bool bInitialized = false; + if(!bInitialized) + { + bInitialized = true; + memset(&g_MicroProfileUI, 0, sizeof(g_MicroProfileUI)); + UI.nActiveMenu = (uint32_t)-1; + UI.fDetailedOffsetTarget = UI.fDetailedOffset = 0.f; + UI.fDetailedRangeTarget = UI.fDetailedRange = 50.f; + + UI.nOpacityBackground = 0xff<<24; + UI.nOpacityForeground = 0xff<<24; + + UI.bShowSpikes = false; + + UI.nWidth = 100; + UI.nHeight = 100; + + UI.nCustomActive = (uint32_t)-1; + UI.nCustomTimerCount = 0; + UI.nCustomCount = 0; + + int nIndex = 0; + UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "Reference"); + for(int i = 0; i < MICROPROFILE_NUM_REFERENCE_PRESETS; ++i) + { + UI.Options[nIndex++] = SOptionDesc(0, i, " %6.2fms", g_MicroProfileReferenceTimePresets[i]); + } + UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "BG Opacity"); + for(int i = 0; i < MICROPROFILE_NUM_OPACITY_PRESETS; ++i) + { + UI.Options[nIndex++] = SOptionDesc(1, i, " %7d%%", (i+1)*25); + } + UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "FG Opacity"); + for(int i = 0; i < MICROPROFILE_NUM_OPACITY_PRESETS; ++i) + { + UI.Options[nIndex++] = SOptionDesc(2, i, " %7d%%", (i+1)*25); + } + UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "Spike Display"); + UI.Options[nIndex++] = SOptionDesc(3, 0, "%s", " Enable"); + +#if MICROPROFILE_CONTEXT_SWITCH_TRACE + UI.Options[nIndex++] = SOptionDesc(0xff, 0, "%s", "CSwitch Trace"); + UI.Options[nIndex++] = SOptionDesc(4, 0, "%s", " Enable"); + UI.Options[nIndex++] = SOptionDesc(4, 1, "%s", " All Threads"); + UI.Options[nIndex++] = SOptionDesc(4, 2, "%s", " No Bars"); +#endif + MP_ASSERT(nIndex == MICROPROFILE_OPTION_SIZE); + } +} + +void MicroProfileSetDisplayMode(int nValue) +{ + MicroProfile& S = *MicroProfileGet(); + nValue = nValue >= 0 && nValue < 4 ? nValue : S.nDisplay; + S.nDisplay = nValue; + UI.nOffsetY = 0; +} + +void MicroProfileToggleDisplayMode() +{ + MicroProfile& S = *MicroProfileGet(); + S.nDisplay = (S.nDisplay + 1) % 4; + UI.nOffsetY = 0; +} + + +void MicroProfileStringArrayClear(MicroProfileStringArray* pArray) +{ + pArray->nNumStrings = 0; + pArray->pBufferPos = &pArray->Buffer[0]; +} + +void MicroProfileStringArrayAddLiteral(MicroProfileStringArray* pArray, const char* pLiteral) +{ + MP_ASSERT(pArray->nNumStrings < MICROPROFILE_TOOLTIP_MAX_STRINGS); + pArray->ppStrings[pArray->nNumStrings++] = pLiteral; +} + +void MicroProfileStringArrayFormat(MicroProfileStringArray* pArray, const char* fmt, ...) +{ + MP_ASSERT(pArray->nNumStrings < MICROPROFILE_TOOLTIP_MAX_STRINGS); + pArray->ppStrings[pArray->nNumStrings++] = pArray->pBufferPos; + va_list args; + va_start (args, fmt); + pArray->pBufferPos += 1 + vsprintf(pArray->pBufferPos, fmt, args); + va_end(args); + MP_ASSERT(pArray->pBufferPos < pArray->Buffer + MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE); +} +void MicroProfileStringArrayCopy(MicroProfileStringArray* pDest, MicroProfileStringArray* pSrc) +{ + memcpy(&pDest->ppStrings[0], &pSrc->ppStrings[0], sizeof(pDest->ppStrings)); + memcpy(&pDest->Buffer[0], &pSrc->Buffer[0], sizeof(pDest->Buffer)); + for(uint32_t i = 0; i < MICROPROFILE_TOOLTIP_MAX_STRINGS; ++i) + { + if(i < pSrc->nNumStrings) + { + if(pSrc->ppStrings[i] >= &pSrc->Buffer[0] && pSrc->ppStrings[i] < &pSrc->Buffer[0] + MICROPROFILE_TOOLTIP_STRING_BUFFER_SIZE) + { + pDest->ppStrings[i] += &pDest->Buffer[0] - &pSrc->Buffer[0]; + } + } + } + pDest->nNumStrings = pSrc->nNumStrings; +} + +void MicroProfileFloatWindowSize(const char** ppStrings, uint32_t nNumStrings, uint32_t* pColors, uint32_t& nWidth, uint32_t& nHeight, uint32_t* pStringLengths = 0) +{ + uint32_t* nStringLengths = pStringLengths ? pStringLengths : (uint32_t*)alloca(nNumStrings * sizeof(uint32_t)); + uint32_t nTextCount = nNumStrings/2; + for(uint32_t i = 0; i < nTextCount; ++i) + { + uint32_t i0 = i * 2; + uint32_t s0, s1; + nStringLengths[i0] = s0 = (uint32_t)strlen(ppStrings[i0]); + nStringLengths[i0+1] = s1 = (uint32_t)strlen(ppStrings[i0+1]); + nWidth = MicroProfileMax(s0+s1, nWidth); + } + nWidth = (MICROPROFILE_TEXT_WIDTH+1) * (2+nWidth) + 2 * MICROPROFILE_BORDER_SIZE; + if(pColors) + nWidth += MICROPROFILE_TEXT_WIDTH + 1; + nHeight = (MICROPROFILE_TEXT_HEIGHT+1) * nTextCount + 2 * MICROPROFILE_BORDER_SIZE; +} + +void MicroProfileDrawFloatWindow(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0) +{ + uint32_t nWidth = 0, nHeight = 0; + uint32_t* nStringLengths = (uint32_t*)alloca(nNumStrings * sizeof(uint32_t)); + MicroProfileFloatWindowSize(ppStrings, nNumStrings, pColors, nWidth, nHeight, nStringLengths); + uint32_t nTextCount = nNumStrings/2; + if(nX + nWidth > UI.nWidth) + nX = UI.nWidth - nWidth; + if(nY + nHeight > UI.nHeight) + nY = UI.nHeight - nHeight; + MicroProfileDrawBox(nX-1, nY-1, nX + nWidth+1, nY + nHeight+1, 0xff000000|nColor); + MicroProfileDrawBox(nX, nY, nX + nWidth, nY + nHeight, 0xff000000); + if(pColors) + { + nX += MICROPROFILE_TEXT_WIDTH+1; + nWidth -= MICROPROFILE_TEXT_WIDTH+1; + } + for(uint32_t i = 0; i < nTextCount; ++i) + { + int i0 = i * 2; + if(pColors) + { + MicroProfileDrawBox(nX-MICROPROFILE_TEXT_WIDTH, nY, nX, nY + MICROPROFILE_TEXT_WIDTH, pColors[i]|0xff000000); + } + MicroProfileDrawText(nX + 1, nY + 1, (uint32_t)-1, ppStrings[i0], (uint32_t)strlen(ppStrings[i0])); + MicroProfileDrawText(nX + nWidth - nStringLengths[i0+1] * (MICROPROFILE_TEXT_WIDTH+1), nY + 1, (uint32_t)-1, ppStrings[i0+1], (uint32_t)strlen(ppStrings[i0+1])); + nY += (MICROPROFILE_TEXT_HEIGHT+1); + } +} +void MicroProfileDrawTextBox(uint32_t nX, uint32_t nY, const char** ppStrings, uint32_t nNumStrings, uint32_t nColor, uint32_t* pColors = 0) +{ + uint32_t nWidth = 0, nHeight = 0; + uint32_t* nStringLengths = (uint32_t*)alloca(nNumStrings * sizeof(uint32_t)); + for(uint32_t i = 0; i < nNumStrings; ++i) + { + nStringLengths[i] = (uint32_t)strlen(ppStrings[i]); + nWidth = MicroProfileMax(nWidth, nStringLengths[i]); + nHeight++; + } + nWidth = (MICROPROFILE_TEXT_WIDTH+1) * (2+nWidth) + 2 * MICROPROFILE_BORDER_SIZE; + nHeight = (MICROPROFILE_TEXT_HEIGHT+1) * nHeight + 2 * MICROPROFILE_BORDER_SIZE; + if(nX + nWidth > UI.nWidth) + nX = UI.nWidth - nWidth; + if(nY + nHeight > UI.nHeight) + nY = UI.nHeight - nHeight; + MicroProfileDrawBox(nX, nY, nX + nWidth, nY + nHeight, 0xff000000); + for(uint32_t i = 0; i < nNumStrings; ++i) + { + MicroProfileDrawText(nX + 1, nY + 1, (uint32_t)-1, ppStrings[i], (uint32_t)strlen(ppStrings[i])); + nY += (MICROPROFILE_TEXT_HEIGHT+1); + } +} + + + +void MicroProfileToolTipMeta(MicroProfileStringArray* pToolTip) +{ + MicroProfile& S = *MicroProfileGet(); + if(UI.nRangeBeginIndex != UI.nRangeEndIndex && UI.pRangeLog) + { + uint64_t nMetaSum[MICROPROFILE_META_MAX] = {0}; + uint64_t nMetaSumInclusive[MICROPROFILE_META_MAX] = {0}; + int nStackDepth = 0; + uint32_t nRange[2][2]; + MicroProfileThreadLog* pLog = UI.pRangeLog; + + + MicroProfileGetRange(UI.nRangeEndIndex, UI.nRangeBeginIndex, nRange); + for(uint32_t i = 0; i < 2; ++i) + { + uint32_t nStart = nRange[i][0]; + uint32_t nEnd = nRange[i][1]; + for(uint32_t j = nStart; j < nEnd; ++j) + { + MicroProfileLogEntry LE = pLog->Log[j]; + int nType = MicroProfileLogType(LE); + switch(nType) + { + case MP_LOG_META: + { + int64_t nMetaIndex = MicroProfileLogTimerIndex(LE); + int64_t nMetaCount = MicroProfileLogGetTick(LE); + MP_ASSERT(nMetaIndex < MICROPROFILE_META_MAX); + if(nStackDepth>1) + { + nMetaSumInclusive[nMetaIndex] += nMetaCount; + } + else + { + nMetaSum[nMetaIndex] += nMetaCount; + } + } + break; + case MP_LOG_LEAVE: + if(nStackDepth) + { + nStackDepth--; + } + else + { + for(int i = 0; i < MICROPROFILE_META_MAX; ++i) + { + nMetaSumInclusive[i] += nMetaSum[i]; + nMetaSum[i] = 0; + } + } + break; + case MP_LOG_ENTER: + nStackDepth++; + break; + } + + } + } + bool bSpaced = false; + for(int i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(S.MetaCounters[i].pName && (nMetaSum[i]||nMetaSumInclusive[i])) + { + if(!bSpaced) + { + bSpaced = true; + MicroProfileStringArrayAddLiteral(pToolTip, ""); + MicroProfileStringArrayAddLiteral(pToolTip, ""); + } + MicroProfileStringArrayFormat(pToolTip, "%s excl", S.MetaCounters[i].pName); + MicroProfileStringArrayFormat(pToolTip, "%5d", nMetaSum[i]); + MicroProfileStringArrayFormat(pToolTip, "%s incl", S.MetaCounters[i].pName); + MicroProfileStringArrayFormat(pToolTip, "%5d", nMetaSum[i] + nMetaSumInclusive[i]); + } + } + } +} + +void MicroProfileDrawFloatTooltip(uint32_t nX, uint32_t nY, uint32_t nToken, uint64_t nTime) +{ + MicroProfile& S = *MicroProfileGet(); + + uint32_t nIndex = MicroProfileGetTimerIndex(nToken); + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + uint32_t nAggregateCount = S.Aggregate[nIndex].nCount ? S.Aggregate[nIndex].nCount : 1; + + uint32_t nGroupId = MicroProfileGetGroupIndex(nToken); + uint32_t nTimerId = MicroProfileGetTimerIndex(nToken); + bool bGpu = S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu; + + float fToMs = MicroProfileTickToMsMultiplier(bGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + + float fMs = fToMs * (nTime); + float fFrameMs = fToMs * (S.Frame[nIndex].nTicks); + float fAverage = fToMs * (S.Aggregate[nIndex].nTicks/nAggregateFrames); + float fCallAverage = fToMs * (S.Aggregate[nIndex].nTicks / nAggregateCount); + float fMax = fToMs * (S.AggregateMax[nIndex]); + + float fFrameMsExclusive = fToMs * (S.FrameExclusive[nIndex]); + float fAverageExclusive = fToMs * (S.AggregateExclusive[nIndex]/nAggregateFrames); + float fMaxExclusive = fToMs * (S.AggregateMaxExclusive[nIndex]); + + float fGroupAverage = fToMs * (S.AggregateGroup[nGroupId] / nAggregateFrames); + float fGroupMax = fToMs * (S.AggregateGroupMax[nGroupId]); + float fGroup = fToMs * (S.FrameGroup[nGroupId]); + + + MicroProfileStringArray ToolTip; + MicroProfileStringArrayClear(&ToolTip); + const char* pGroupName = S.GroupInfo[nGroupId].pName; + const char* pTimerName = S.TimerInfo[nTimerId].pName; + MicroProfileStringArrayAddLiteral(&ToolTip, "Timer:"); + MicroProfileStringArrayFormat(&ToolTip, "%s", pTimerName); + +#if MICROPROFILE_DEBUG + MicroProfileStringArrayFormat(&ToolTip,"0x%p", UI.nHoverAddressEnter); + MicroProfileStringArrayFormat(&ToolTip,"0x%p", UI.nHoverAddressLeave); +#endif + + if(nTime != (uint64_t)0) + { + MicroProfileStringArrayAddLiteral(&ToolTip, "Time:"); + MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fMs); + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + } + + MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Time:"); + MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fFrameMs); + + MicroProfileStringArrayAddLiteral(&ToolTip, "Average:"); + MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fAverage); + + MicroProfileStringArrayAddLiteral(&ToolTip, "Max:"); + MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fMax); + + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + + MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Call Average:"); + MicroProfileStringArrayFormat(&ToolTip,"%6.3fms", fCallAverage); + + MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Call Count:"); + MicroProfileStringArrayFormat(&ToolTip, "%6d", nAggregateCount / nAggregateFrames); + + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + + MicroProfileStringArrayAddLiteral(&ToolTip, "Exclusive Frame Time:"); + MicroProfileStringArrayFormat(&ToolTip, "%6.3fms", fFrameMsExclusive); + + MicroProfileStringArrayAddLiteral(&ToolTip, "Exclusive Average:"); + MicroProfileStringArrayFormat(&ToolTip, "%6.3fms", fAverageExclusive); + + MicroProfileStringArrayAddLiteral(&ToolTip, "Exclusive Max:"); + MicroProfileStringArrayFormat(&ToolTip, "%6.3fms", fMaxExclusive); + + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + + MicroProfileStringArrayAddLiteral(&ToolTip, "Group:"); + MicroProfileStringArrayFormat(&ToolTip, "%s", pGroupName); + MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Time:"); + MicroProfileStringArrayFormat(&ToolTip, "%6.3f", fGroup); + MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Average:"); + MicroProfileStringArrayFormat(&ToolTip, "%6.3f", fGroupAverage); + MicroProfileStringArrayAddLiteral(&ToolTip, "Frame Max:"); + MicroProfileStringArrayFormat(&ToolTip, "%6.3f", fGroupMax); + + + + + MicroProfileToolTipMeta(&ToolTip); + + + MicroProfileDrawFloatWindow(nX, nY+20, &ToolTip.ppStrings[0], ToolTip.nNumStrings, S.TimerInfo[nTimerId].nColor); + + if(UI.nMouseLeftMod) + { + int nIndex = (g_MicroProfileUI.LockedToolTipFront + MICROPROFILE_TOOLTIP_MAX_LOCKED - 1) % MICROPROFILE_TOOLTIP_MAX_LOCKED; + g_MicroProfileUI.nLockedToolTipColor[nIndex] = S.TimerInfo[nTimerId].nColor; + MicroProfileStringArrayCopy(&g_MicroProfileUI.LockedToolTips[nIndex], &ToolTip); + g_MicroProfileUI.LockedToolTipFront = nIndex; + + } +} + + +void MicroProfileZoomTo(int64_t nTickStart, int64_t nTickEnd) +{ + MicroProfile& S = *MicroProfileGet(); + + int64_t nStart = S.Frames[S.nFrameCurrent].nFrameStartCpu; + float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + UI.fDetailedOffsetTarget = MicroProfileLogTickDifference(nStart, nTickStart) * fToMs; + UI.fDetailedRangeTarget = MicroProfileLogTickDifference(nTickStart, nTickEnd) * fToMs; +} + +void MicroProfileCenter(int64_t nTickCenter) +{ + MicroProfile& S = *MicroProfileGet(); + int64_t nStart = S.Frames[S.nFrameCurrent].nFrameStartCpu; + float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + float fCenter = MicroProfileLogTickDifference(nStart, nTickCenter) * fToMs; + UI.fDetailedOffsetTarget = UI.fDetailedOffset = fCenter - 0.5f * UI.fDetailedRange; +} +#ifdef MICROPROFILE_DEBUG +uint64_t* g_pMicroProfileDumpStart = 0; +uint64_t* g_pMicroProfileDumpEnd = 0; +void MicroProfileDebugDumpRange() +{ + MicroProfile& S = *MicroProfileGet(); + if(g_pMicroProfileDumpStart != g_pMicroProfileDumpEnd) + { + uint64_t* pStart = g_pMicroProfileDumpStart; + uint64_t* pEnd = g_pMicroProfileDumpEnd; + while(pStart != pEnd) + { + uint64_t nTick = MicroProfileLogGetTick(*pStart); + uint64_t nToken = MicroProfileLogTimerIndex(*pStart); + uint32_t nTimerId = MicroProfileGetTimerIndex(nToken); + + const char* pTimerName = S.TimerInfo[nTimerId].pName; + char buffer[256]; + int type = MicroProfileLogType(*pStart); + + const char* pBegin = type == MP_LOG_LEAVE ? "END" : + (type == MP_LOG_ENTER ? "BEGIN" : "META"); + snprintf(buffer, 255, "DUMP 0x%p: %s :: %llx: %s\n", pStart, pBegin, nTick, pTimerName); +#ifdef _WIN32 + OutputDebugString(buffer); +#else + printf("%s", buffer); +#endif + pStart++; + } + + g_pMicroProfileDumpStart = g_pMicroProfileDumpEnd; + } +} +#define MP_DEBUG_DUMP_RANGE() MicroProfileDebugDumpRange(); +#else +#define MP_DEBUG_DUMP_RANGE() do{} while(0) +#endif + +#define MICROPROFILE_HOVER_DIST 0.5f + +void MicroProfileDrawDetailedContextSwitchBars(uint32_t nY, uint32_t nThreadId, uint32_t nContextSwitchStart, uint32_t nContextSwitchEnd, int64_t nBaseTicks, uint32_t nBaseY) +{ + MicroProfile& S = *MicroProfileGet(); + int64_t nTickIn = -1; + uint32_t nThreadBefore = -1; + float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + float fMsToScreen = UI.nWidth / UI.fDetailedRange; + float fMouseX = (float)UI.nMouseX; + float fMouseY = (float)UI.nMouseY; + + + for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE) + { + MP_ASSERT(j < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE); + MicroProfileContextSwitch CS = S.ContextSwitch[j]; + + if(nTickIn == -1) + { + if(CS.nThreadIn == nThreadId) + { + nTickIn = CS.nTicks; + nThreadBefore = CS.nThreadOut; + } + } + else + { + if(CS.nThreadOut == nThreadId) + { + int64_t nTickOut = CS.nTicks; + float fMsStart = fToMs * MicroProfileLogTickDifference(nBaseTicks, nTickIn); + float fMsEnd = fToMs * MicroProfileLogTickDifference(nBaseTicks, nTickOut); + if(fMsStart <= fMsEnd) + { + float fXStart = fMsStart * fMsToScreen; + float fXEnd = fMsEnd * fMsToScreen; + float fYStart = (float)nY; + float fYEnd = fYStart + (MICROPROFILE_DETAILED_CONTEXT_SWITCH_HEIGHT); + uint32_t nColor = g_nMicroProfileContextSwitchThreadColors[CS.nCpu%MICROPROFILE_NUM_CONTEXT_SWITCH_COLORS]; + float fXDist = MicroProfileMax(fXStart - fMouseX, fMouseX - fXEnd); + bool bHover = fXDist < MICROPROFILE_HOVER_DIST && fYStart <= fMouseY && fMouseY <= fYEnd && nBaseY < fMouseY; + if(bHover) + { + UI.nRangeBegin = nTickIn; + UI.nRangeEnd = nTickOut; + S.nContextSwitchHoverTickIn = nTickIn; + S.nContextSwitchHoverTickOut = nTickOut; + S.nContextSwitchHoverThread = CS.nThreadOut; + S.nContextSwitchHoverThreadBefore = nThreadBefore; + S.nContextSwitchHoverThreadAfter = CS.nThreadIn; + S.nContextSwitchHoverCpuNext = CS.nCpu; + nColor = UI.nHoverColor; + } + if(CS.nCpu == S.nContextSwitchHoverCpu) + { + nColor = UI.nHoverColorShared; + } + MicroProfileDrawBox(fXStart, fYStart, fXEnd, fYEnd, nColor|UI.nOpacityForeground, MicroProfileBoxTypeFlat); + } + nTickIn = -1; + } + } + } +} + +void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY, int nSelectedFrame) +{ + MicroProfile& S = *MicroProfileGet(); + MP_DEBUG_DUMP_RANGE(); + int nY = nBaseY - UI.nOffsetY; + int64_t nNumBoxes = 0; + int64_t nNumLines = 0; + + uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY; + MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent]; + MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext]; + + UI.nRangeBegin = 0; + UI.nRangeEnd = 0; + UI.nRangeBeginGpu = 0; + UI.nRangeEndGpu = 0; + UI.nRangeBeginIndex = UI.nRangeEndIndex = 0; + UI.pRangeLog = 0; + int64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu; + int64_t nFrameStartGpu = pFrameCurrent->nFrameStartGpu; + int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu(); + int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu(); + float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu); + float fToMsGpu = MicroProfileTickToMsMultiplier(nTicksPerSecondGpu); + + float fDetailedOffset = UI.fDetailedOffset; + float fDetailedRange = UI.fDetailedRange; + + + int64_t nDetailedOffsetTicksCpu = MicroProfileMsToTick(fDetailedOffset, MicroProfileTicksPerSecondCpu()); + int64_t nDetailedOffsetTicksGpu = MicroProfileMsToTick(fDetailedOffset, MicroProfileTicksPerSecondGpu()); + int64_t nBaseTicksCpu = nDetailedOffsetTicksCpu + nFrameStartCpu; + int64_t nBaseTicksGpu = nDetailedOffsetTicksGpu + nFrameStartGpu; + int64_t nBaseTicksEndCpu = nBaseTicksCpu + MicroProfileMsToTick(fDetailedRange, MicroProfileTicksPerSecondCpu()); + + int64_t nTickReferenceCpu = 0, nTickReferenceGpu = 0; + static int64_t nRefCpu = 0, nRefGpu = 0; + if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu)) + { + if(0 == nRefCpu || abs(nRefCpu-nBaseTicksCpu) > abs(nTickReferenceCpu-nBaseTicksCpu)) + { + nRefCpu = nTickReferenceCpu; + nRefGpu = nTickReferenceGpu; + } + else + { + nTickReferenceCpu = nRefCpu; + nTickReferenceGpu = nRefGpu; + } + nBaseTicksGpu = (nBaseTicksCpu - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu; + } + int64_t nBaseTicksEndGpu = nBaseTicksCpu + MicroProfileMsToTick(fDetailedRange, MicroProfileTicksPerSecondCpu()); + + MicroProfileFrameState* pFrameFirst = pFrameCurrent; + int64_t nGapTime = MicroProfileTicksPerSecondCpu() * MICROPROFILE_GAP_TIME / 1000; + for(uint32_t i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY; ++i) + { + uint32_t nNextIndex = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY; + pFrameFirst = &S.Frames[nNextIndex]; + if(pFrameFirst->nFrameStartCpu <= nBaseTicksCpu-nGapTime) + break; + } + + float fMsBase = fToMsCpu * nDetailedOffsetTicksCpu; + float fMs = fDetailedRange; + float fMsEnd = fMs + fMsBase; + float fWidth = (float)nWidth; + float fMsToScreen = fWidth / fMs; + + { + float fRate = floor(2*(log10(fMs)-1))/2; + float fStep = powf(10.f, fRate); + float fRcpStep = 1.f / fStep; + int nColorIndex = (int)(floor(fMsBase*fRcpStep)); + float fStart = floor(fMsBase*fRcpStep) * fStep; + for(float f = fStart; f < fMsEnd; ) + { + float fStart = f; + float fNext = f + fStep; + MicroProfileDrawBox(((fStart-fMsBase) * fMsToScreen), nBaseY, (fNext-fMsBase) * fMsToScreen+1, nBaseY + nHeight, UI.nOpacityBackground | g_nMicroProfileBackColors[nColorIndex++ & 1]); + f = fNext; + } + } + + nY += MICROPROFILE_TEXT_HEIGHT+1; + MicroProfileLogEntry* pMouseOver = UI.pDisplayMouseOver; + MicroProfileLogEntry* pMouseOverNext = 0; + uint64_t nMouseOverToken = pMouseOver ? MicroProfileLogTimerIndex(*pMouseOver) : MICROPROFILE_INVALID_TOKEN; + float fMouseX = (float)UI.nMouseX; + float fMouseY = (float)UI.nMouseY; + uint64_t nHoverToken = MICROPROFILE_INVALID_TOKEN; + int64_t nHoverTime = 0; + + static int nHoverCounter = 155; + static int nHoverCounterDelta = 10; + nHoverCounter += nHoverCounterDelta; + if(nHoverCounter >= 245) + nHoverCounterDelta = -10; + else if(nHoverCounter < 100) + nHoverCounterDelta = 10; + UI.nHoverColor = (nHoverCounter<<24)|(nHoverCounter<<16)|(nHoverCounter<<8)|nHoverCounter; + uint32_t nHoverCounterShared = nHoverCounter>>2; + UI.nHoverColorShared = (nHoverCounterShared<<24)|(nHoverCounterShared<<16)|(nHoverCounterShared<<8)|nHoverCounterShared; + + uint32_t nLinesDrawn[MICROPROFILE_STACK_MAX]={0}; + + uint32_t nContextSwitchHoverThreadAfter = S.nContextSwitchHoverThreadAfter; + uint32_t nContextSwitchHoverThreadBefore = S.nContextSwitchHoverThreadBefore; + S.nContextSwitchHoverThread = S.nContextSwitchHoverThreadAfter = S.nContextSwitchHoverThreadBefore = -1; + + uint32_t nContextSwitchStart = -1; + uint32_t nContextSwitchEnd = -1; + S.nContextSwitchHoverCpuNext = 0xff; + S.nContextSwitchHoverTickIn = -1; + S.nContextSwitchHoverTickOut = -1; + if(S.bContextSwitchRunning) + { + MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nBaseTicksCpu, nBaseTicksEndCpu); + } + + bool bSkipBarView = S.bContextSwitchRunning && S.bContextSwitchNoBars; + + if(!bSkipBarView) + { + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + MicroProfileThreadLog* pLog = S.Pool[i]; + if(!pLog) + continue; + + uint32_t nPut = pFrameNext->nLogStart[i]; + ///note: this may display new samples as old data, but this will only happen when + // unpaused, where the detailed view is hardly perceptible + uint32_t nFront = S.Pool[i]->nPut.load(std::memory_order_relaxed); + MicroProfileFrameState* pFrameLogFirst = pFrameCurrent; + MicroProfileFrameState* pFrameLogLast = pFrameNext; + uint32_t nGet = pFrameLogFirst->nLogStart[i]; + do + { + MP_ASSERT(pFrameLogFirst >= &S.Frames[0] && pFrameLogFirst < &S.Frames[MICROPROFILE_MAX_FRAME_HISTORY]); + uint32_t nNewGet = pFrameLogFirst->nLogStart[i]; + bool bIsValid = false; + if(nPut < nFront) + { + bIsValid = nNewGet <= nPut || nNewGet >= nFront; + } + else + { + bIsValid = nNewGet <= nPut && nNewGet >= nFront; + } + if(bIsValid) + { + nGet = nNewGet; + pFrameLogFirst--; + if(pFrameLogFirst < &S.Frames[0]) + pFrameLogFirst = &S.Frames[MICROPROFILE_MAX_FRAME_HISTORY-1]; + } + else + { + break; + } + }while(pFrameLogFirst != pFrameFirst); + + + if(nGet == (uint32_t)-1) + continue; + MP_ASSERT(nGet != (uint32_t)-1); + + nPut = pFrameLogLast->nLogStart[i]; + + uint32_t nRange[2][2] = { {0, 0}, {0, 0}, }; + + MicroProfileGetRange(nPut, nGet, nRange); + if(nPut == nGet) + continue; + uint32_t nMaxStackDepth = 0; + + bool bGpu = pLog->nGpu != 0; + float fToMs = bGpu ? fToMsGpu : fToMsCpu; + int64_t nBaseTicks = bGpu ? nBaseTicksGpu : nBaseTicksCpu; + char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16]; + uint64_t nThreadId = pLog->nThreadId; + snprintf(ThreadName, sizeof(ThreadName)-1, "%04llx: %s", nThreadId, &pLog->ThreadName[0] ); + nY += 3; + uint32_t nThreadColor = -1; + if(pLog->nThreadId == nContextSwitchHoverThreadAfter || pLog->nThreadId == nContextSwitchHoverThreadBefore) + nThreadColor = UI.nHoverColorShared|0x906060; + MicroProfileDrawText(0, nY, nThreadColor, &ThreadName[0], (uint32_t)strlen(&ThreadName[0])); + nY += 3; + nY += MICROPROFILE_TEXT_HEIGHT + 1; + + if(S.bContextSwitchRunning) + { + MicroProfileDrawDetailedContextSwitchBars(nY, pLog->nThreadId, nContextSwitchStart, nContextSwitchEnd, nBaseTicks, nBaseY); + nY -= MICROPROFILE_DETAILED_BAR_HEIGHT; + nY += MICROPROFILE_DETAILED_CONTEXT_SWITCH_HEIGHT+1; + } + + uint32_t nYDelta = MICROPROFILE_DETAILED_BAR_HEIGHT; + uint32_t nStack[MICROPROFILE_STACK_MAX]; + uint32_t nStackPos = 0; + for(uint32_t j = 0; j < 2; ++j) + { + uint32_t nStart = nRange[j][0]; + uint32_t nEnd = nRange[j][1]; + for(uint32_t k = nStart; k < nEnd; ++k) + { + MicroProfileLogEntry* pEntry = pLog->Log + k; + int nType = MicroProfileLogType(*pEntry); + if(MP_LOG_ENTER == nType) + { + MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX); + nStack[nStackPos++] = k; + } + else if(MP_LOG_META == nType) + { + + } + else if(MP_LOG_LEAVE == nType) + { + if(0 == nStackPos) + { + continue; + } + + MicroProfileLogEntry* pEntryEnter = pLog->Log + nStack[nStackPos-1]; + if(MicroProfileLogTimerIndex(*pEntryEnter) != MicroProfileLogTimerIndex(*pEntry)) + { + //uprintf("mismatch %llx %llx\n", pEntryEnter->nToken, pEntry->nToken); + continue; + } + int64_t nTickStart = MicroProfileLogGetTick(*pEntryEnter); + int64_t nTickEnd = MicroProfileLogGetTick(*pEntry); + uint64_t nTimerIndex = MicroProfileLogTimerIndex(*pEntry); + uint32_t nColor = S.TimerInfo[nTimerIndex].nColor; + if(nMouseOverToken == nTimerIndex) + { + if(pEntry == pMouseOver) + { + nColor = UI.nHoverColor; + if(bGpu) + { + UI.nRangeBeginGpu = *pEntryEnter; + UI.nRangeEndGpu = *pEntry; + uint32_t nCpuBegin = (nStack[nStackPos-1] + 1) % MICROPROFILE_BUFFER_SIZE; + uint32_t nCpuEnd = (k + 1) % MICROPROFILE_BUFFER_SIZE; + MicroProfileLogEntry LogCpuBegin = pLog->Log[nCpuBegin]; + MicroProfileLogEntry LogCpuEnd = pLog->Log[nCpuEnd]; + if(MicroProfileLogType(LogCpuBegin)==3 && MicroProfileLogType(LogCpuEnd) == 3) + { + UI.nRangeBegin = LogCpuBegin; + UI.nRangeEnd = LogCpuEnd; + } + UI.nRangeBeginIndex = nStack[nStackPos-1]; + UI.nRangeEndIndex = k; + UI.pRangeLog = pLog; + } + else + { + UI.nRangeBegin = *pEntryEnter; + UI.nRangeEnd = *pEntry; + UI.nRangeBeginIndex = nStack[nStackPos-1]; + UI.nRangeEndIndex = k; + UI.pRangeLog = pLog; + + } + } + else + { + nColor = UI.nHoverColorShared; + } + } + + nMaxStackDepth = MicroProfileMax(nMaxStackDepth, nStackPos); + float fMsStart = fToMs * MicroProfileLogTickDifference(nBaseTicks, nTickStart); + float fMsEnd = fToMs * MicroProfileLogTickDifference(nBaseTicks, nTickEnd); + float fXStart = fMsStart * fMsToScreen; + float fXEnd = fMsEnd * fMsToScreen; + float fYStart = (float)(nY + nStackPos * nYDelta); + float fYEnd = fYStart + (MICROPROFILE_DETAILED_BAR_HEIGHT); + float fXDist = MicroProfileMax(fXStart - fMouseX, fMouseX - fXEnd); + bool bHover = fXDist < MICROPROFILE_HOVER_DIST && fYStart <= fMouseY && fMouseY <= fYEnd && nBaseY < fMouseY; + uint32_t nIntegerWidth = (uint32_t)(fXEnd - fXStart); + if(nIntegerWidth) + { + if(bHover && UI.nActiveMenu == -1) + { + nHoverToken = MicroProfileLogTimerIndex(*pEntry); + #if MICROPROFILE_DEBUG + UI.nHoverAddressEnter = (uint64_t)pEntryEnter; + UI.nHoverAddressLeave = (uint64_t)pEntry; + #endif + nHoverTime = MicroProfileLogTickDifference(nTickStart, nTickEnd); + pMouseOverNext = pEntry; + } + + MicroProfileDrawBox(fXStart, fYStart, fXEnd, fYEnd, nColor|UI.nOpacityForeground, MicroProfileBoxTypeBar); +#if MICROPROFILE_DETAILED_BAR_NAMES + if(nIntegerWidth>3*MICROPROFILE_TEXT_WIDTH) + { + float fXStartText = MicroProfileMax(fXStart, 0.f); + int nTextWidth = (int)(fXEnd - fXStartText); + int nCharacters = (nTextWidth - 2*MICROPROFILE_TEXT_WIDTH) / MICROPROFILE_TEXT_WIDTH; + if(nCharacters>0) + { + MicroProfileDrawText(fXStartText+1, fYStart+1, -1, S.TimerInfo[nTimerIndex].pName, MicroProfileMin<uint32_t>(S.TimerInfo[nTimerIndex].nNameLen, nCharacters)); + } + } +#endif + ++nNumBoxes; + } + else + { + float fXAvg = 0.5f * (fXStart + fXEnd); + int nLineX = (int)floor(fXAvg+0.5f); + if(nLineX != (int)nLinesDrawn[nStackPos]) + { + if(bHover && UI.nActiveMenu == -1) + { + nHoverToken = (uint32_t)MicroProfileLogTimerIndex(*pEntry); + nHoverTime = MicroProfileLogTickDifference(nTickStart, nTickEnd); + pMouseOverNext = pEntry; + } + nLinesDrawn[nStackPos] = nLineX; + MicroProfileDrawLineVertical(nLineX, fYStart + 0.5f, fYEnd + 0.5f, nColor|UI.nOpacityForeground); + ++nNumLines; + } + } + nStackPos--; + if(0 == nStackPos) + { + if(bGpu ? (nTickStart > nBaseTicksEndGpu) : (nTickStart > nBaseTicksEndCpu)) + { + break; + } + } + } + } + } + nY += nMaxStackDepth * nYDelta + MICROPROFILE_DETAILED_BAR_HEIGHT+1; + } + } + if(S.bContextSwitchRunning && (S.bContextSwitchAllThreads||S.bContextSwitchNoBars)) + { + uint32_t nNumThreads = 0; + uint32_t nThreads[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS]; + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS && S.Pool[i]; ++i) + nThreads[nNumThreads++] = S.Pool[i]->nThreadId; + uint32_t nNumThreadsBase = nNumThreads; + if(S.bContextSwitchAllThreads) + { + for(uint32_t i = nContextSwitchStart; i != nContextSwitchEnd; i = (i+1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE) + { + MicroProfileContextSwitch CS = S.ContextSwitch[i]; + ThreadIdType nThreadId = CS.nThreadIn; + if(nThreadId) + { + bool bSeen = false; + for(uint32_t j = 0; j < nNumThreads; ++j) + { + if(nThreads[j] == nThreadId) + { + bSeen = true; + break; + } + } + if(!bSeen) + { + nThreads[nNumThreads++] = nThreadId; + } + } + if(nNumThreads == MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS) + { + S.nOverflow = 10; + break; + } + } + std::sort(&nThreads[nNumThreadsBase], &nThreads[nNumThreads]); + } + uint32_t nStart = nNumThreadsBase; + if(S.bContextSwitchNoBars) + nStart = 0; + for(uint32_t i = nStart; i < nNumThreads; ++i) + { + ThreadIdType nThreadId = nThreads[i]; + if(nThreadId) + { + char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16]; + const char* cLocal = MicroProfileIsLocalThread(nThreadId) ? "*": " "; + + int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04x: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) ); + uint32_t nThreadColor = -1; + if(nThreadId == nContextSwitchHoverThreadAfter || nThreadId == nContextSwitchHoverThreadBefore) + nThreadColor = UI.nHoverColorShared|0x906060; + MicroProfileDrawDetailedContextSwitchBars(nY+2, nThreadId, nContextSwitchStart, nContextSwitchEnd, nBaseTicksCpu, nBaseY); + MicroProfileDrawText(0, nY, nThreadColor, &ThreadName[0], nStrLen); + nY += MICROPROFILE_TEXT_HEIGHT+1; + } + } + } + + S.nContextSwitchHoverCpu = S.nContextSwitchHoverCpuNext; + + + + + UI.pDisplayMouseOver = pMouseOverNext; + + if(!S.nRunning) + { + if(nHoverToken != MICROPROFILE_INVALID_TOKEN && nHoverTime) + { + UI.nHoverToken = nHoverToken; + UI.nHoverTime = nHoverTime; + } + + if(nSelectedFrame != -1) + { + UI.nRangeBegin = S.Frames[nSelectedFrame].nFrameStartCpu; + UI.nRangeEnd = S.Frames[(nSelectedFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartCpu; + UI.nRangeBeginGpu = S.Frames[nSelectedFrame].nFrameStartGpu; + UI.nRangeEndGpu = S.Frames[(nSelectedFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartGpu; + } + if(UI.nRangeBegin != UI.nRangeEnd) + { + float fMsStart = fToMsCpu * MicroProfileLogTickDifference(nBaseTicksCpu, UI.nRangeBegin); + float fMsEnd = fToMsCpu * MicroProfileLogTickDifference(nBaseTicksCpu, UI.nRangeEnd); + float fXStart = fMsStart * fMsToScreen; + float fXEnd = fMsEnd * fMsToScreen; + MicroProfileDrawBox(fXStart, nBaseY, fXEnd, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT, MicroProfileBoxTypeFlat); + MicroProfileDrawLineVertical(fXStart, nBaseY, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT | 0x44000000); + MicroProfileDrawLineVertical(fXEnd, nBaseY, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT | 0x44000000); + + fMsStart += fDetailedOffset; + fMsEnd += fDetailedOffset; + char sBuffer[32]; + uint32_t nLenStart = snprintf(sBuffer, sizeof(sBuffer)-1, "%.2fms", fMsStart); + float fStartTextWidth = (float)((1+MICROPROFILE_TEXT_WIDTH) * nLenStart); + float fStartTextX = fXStart - fStartTextWidth - 2; + MicroProfileDrawBox(fStartTextX, nBaseY, fStartTextX + fStartTextWidth + 2, MICROPROFILE_TEXT_HEIGHT + 2 + nBaseY, 0x33000000, MicroProfileBoxTypeFlat); + MicroProfileDrawText(fStartTextX+1, nBaseY, (uint32_t)-1, sBuffer, nLenStart); + uint32_t nLenEnd = snprintf(sBuffer, sizeof(sBuffer)-1, "%.2fms", fMsEnd); + MicroProfileDrawBox(fXEnd+1, nBaseY, fXEnd+1+(1+MICROPROFILE_TEXT_WIDTH) * nLenEnd + 3, MICROPROFILE_TEXT_HEIGHT + 2 + nBaseY, 0x33000000, MicroProfileBoxTypeFlat); + MicroProfileDrawText(fXEnd+2, nBaseY+1, (uint32_t)-1, sBuffer, nLenEnd); + + if(UI.nMouseRight) + { + MicroProfileZoomTo(UI.nRangeBegin, UI.nRangeEnd); + } + } + + if(UI.nRangeBeginGpu != UI.nRangeEndGpu) + { + float fMsStart = fToMsGpu * MicroProfileLogTickDifference(nBaseTicksGpu, UI.nRangeBeginGpu); + float fMsEnd = fToMsGpu * MicroProfileLogTickDifference(nBaseTicksGpu, UI.nRangeEndGpu); + float fXStart = fMsStart * fMsToScreen; + float fXEnd = fMsEnd * fMsToScreen; + MicroProfileDrawBox(fXStart, nBaseY, fXEnd, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT_GPU, MicroProfileBoxTypeFlat); + MicroProfileDrawLineVertical(fXStart, nBaseY, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT_GPU | 0x44000000); + MicroProfileDrawLineVertical(fXEnd, nBaseY, nHeight, MICROPROFILE_FRAME_COLOR_HIGHTLIGHT_GPU | 0x44000000); + + nBaseY += MICROPROFILE_TEXT_HEIGHT+1; + + fMsStart += fDetailedOffset; + fMsEnd += fDetailedOffset; + char sBuffer[32]; + uint32_t nLenStart = snprintf(sBuffer, sizeof(sBuffer)-1, "%.2fms", fMsStart); + float fStartTextWidth = (float)((1+MICROPROFILE_TEXT_WIDTH) * nLenStart); + float fStartTextX = fXStart - fStartTextWidth - 2; + MicroProfileDrawBox(fStartTextX, nBaseY, fStartTextX + fStartTextWidth + 2, MICROPROFILE_TEXT_HEIGHT + 2 + nBaseY, 0x33000000, MicroProfileBoxTypeFlat); + MicroProfileDrawText(fStartTextX+1, nBaseY, (uint32_t)-1, sBuffer, nLenStart); + uint32_t nLenEnd = snprintf(sBuffer, sizeof(sBuffer)-1, "%.2fms", fMsEnd); + MicroProfileDrawBox(fXEnd+1, nBaseY, fXEnd+1+(1+MICROPROFILE_TEXT_WIDTH) * nLenEnd + 3, MICROPROFILE_TEXT_HEIGHT + 2 + nBaseY, 0x33000000, MicroProfileBoxTypeFlat); + MicroProfileDrawText(fXEnd+2, nBaseY+1, (uint32_t)-1, sBuffer, nLenEnd); + } + } +} + + +void MicroProfileDrawDetailedFrameHistory(uint32_t nWidth, uint32_t nHeight, uint32_t nBaseY, uint32_t nSelectedFrame) +{ + MicroProfile& S = *MicroProfileGet(); + + const uint32_t nBarHeight = MICROPROFILE_FRAME_HISTORY_HEIGHT; + float fBaseX = (float)nWidth; + float fDx = fBaseX / MICROPROFILE_NUM_FRAMES; + + uint32_t nLastIndex = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY; + MicroProfileDrawBox(0, nBaseY, nWidth, nBaseY+MICROPROFILE_FRAME_HISTORY_HEIGHT, 0xff000000 | g_nMicroProfileBackColors[0], MicroProfileBoxTypeFlat); + float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * S.fRcpReferenceTime; + float fToMsGpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu()) * S.fRcpReferenceTime; + + + MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent]; + uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu; + int64_t nDetailedOffsetTicksCpu = MicroProfileMsToTick(UI.fDetailedOffset, MicroProfileTicksPerSecondCpu()); + int64_t nCpuStart = nDetailedOffsetTicksCpu + nFrameStartCpu; + int64_t nCpuEnd = nCpuStart + MicroProfileMsToTick(UI.fDetailedRange, MicroProfileTicksPerSecondCpu());; + + + float fSelectionStart = (float)nWidth; + float fSelectionEnd = 0.f; + for(uint32_t i = 0; i < MICROPROFILE_NUM_FRAMES; ++i) + { + uint32_t nIndex = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY; + MicroProfileFrameState* pCurrent = &S.Frames[nIndex]; + MicroProfileFrameState* pNext = &S.Frames[nLastIndex]; + + int64_t nTicks = pNext->nFrameStartCpu - pCurrent->nFrameStartCpu; + int64_t nTicksGpu = pNext->nFrameStartGpu - pCurrent->nFrameStartGpu; + float fScale = fToMs * nTicks; + float fScaleGpu = fToMsGpu * nTicksGpu; + fScale = fScale > 1.f ? 0.f : 1.f - fScale; + fScaleGpu = fScaleGpu > 1.f ? 0.f : 1.f - fScaleGpu; + float fXEnd = fBaseX; + float fXStart = fBaseX - fDx; + fBaseX = fXStart; + uint32_t nColor = MICROPROFILE_FRAME_HISTORY_COLOR_CPU; + if(nIndex == nSelectedFrame) + nColor = (uint32_t)-1; + MicroProfileDrawBox(fXStart, nBaseY + fScale * nBarHeight, fXEnd, nBaseY+MICROPROFILE_FRAME_HISTORY_HEIGHT, nColor, MicroProfileBoxTypeBar); + if(pNext->nFrameStartCpu > nCpuStart) + { + fSelectionStart = fXStart; + } + if(pCurrent->nFrameStartCpu < nCpuEnd && fSelectionEnd == 0.f) + { + fSelectionEnd = fXEnd; + } + nLastIndex = nIndex; + } + MicroProfileDrawBox(fSelectionStart, nBaseY, fSelectionEnd, nBaseY+MICROPROFILE_FRAME_HISTORY_HEIGHT, MICROPROFILE_FRAME_HISTORY_COLOR_HIGHTLIGHT, MicroProfileBoxTypeFlat); +} +void MicroProfileDrawDetailedView(uint32_t nWidth, uint32_t nHeight) +{ + MicroProfile& S = *MicroProfileGet(); + + MICROPROFILE_SCOPE(g_MicroProfileDetailed); + uint32_t nBaseY = MICROPROFILE_TEXT_HEIGHT + 1; + + int nSelectedFrame = -1; + if(UI.nMouseY > nBaseY && UI.nMouseY <= nBaseY + MICROPROFILE_FRAME_HISTORY_HEIGHT && UI.nActiveMenu == -1) + { + + nSelectedFrame = ((MICROPROFILE_NUM_FRAMES) * (UI.nWidth-UI.nMouseX) / UI.nWidth); + nSelectedFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nSelectedFrame) % MICROPROFILE_MAX_FRAME_HISTORY; + UI.nHoverFrame = nSelectedFrame; + if(UI.nMouseRight) + { + int64_t nRangeBegin = S.Frames[nSelectedFrame].nFrameStartCpu; + int64_t nRangeEnd = S.Frames[(nSelectedFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartCpu; + MicroProfileZoomTo(nRangeBegin, nRangeEnd); + } + if(UI.nMouseDownLeft) + { + uint64_t nFrac = (1024 * (MICROPROFILE_NUM_FRAMES) * (UI.nMouseX) / UI.nWidth) % 1024; + int64_t nRangeBegin = S.Frames[nSelectedFrame].nFrameStartCpu; + int64_t nRangeEnd = S.Frames[(nSelectedFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartCpu; + MicroProfileCenter(nRangeBegin + (nRangeEnd-nRangeBegin) * nFrac / 1024); + } + } + else + { + UI.nHoverFrame = -1; + } + + MicroProfileDrawDetailedBars(nWidth, nHeight, nBaseY + MICROPROFILE_FRAME_HISTORY_HEIGHT, nSelectedFrame); + MicroProfileDrawDetailedFrameHistory(nWidth, nHeight, nBaseY, nSelectedFrame); +} + +void MicroProfileDrawTextRight(uint32_t nX, uint32_t nY, uint32_t nColor, const char* pStr, uint32_t nStrLen) +{ + MicroProfileDrawText(nX - nStrLen * (MICROPROFILE_TEXT_WIDTH+1), nY, nColor, pStr, nStrLen); +} +void MicroProfileDrawHeader(int32_t nX, uint32_t nWidth, const char* pName) +{ + if(pName) + { + MicroProfileDrawBox(nX-8, MICROPROFILE_TEXT_HEIGHT + 2, nX + nWidth+5, MICROPROFILE_TEXT_HEIGHT + 2 + (MICROPROFILE_TEXT_HEIGHT+1), 0xff000000|g_nMicroProfileBackColors[1]); + MicroProfileDrawText(nX, MICROPROFILE_TEXT_HEIGHT + 2, (uint32_t)-1, pName, (uint32_t)strlen(pName)); + } +} + + +typedef void (*MicroProfileLoopGroupCallback)(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pData); + +void MicroProfileLoopActiveGroupsDraw(int32_t nX, int32_t nY, const char* pName, MicroProfileLoopGroupCallback CB, void* pData) +{ + MicroProfile& S = *MicroProfileGet(); + nY += MICROPROFILE_TEXT_HEIGHT + 2; + uint64_t nGroup = S.nAllGroupsWanted ? S.nGroupMask : S.nActiveGroupWanted; + uint32_t nCount = 0; + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + uint64_t nMask = 1ll << j; + if(nMask & nGroup) + { + nY += MICROPROFILE_TEXT_HEIGHT + 1; + for(uint32_t i = 0; i < S.nTotalTimers;++i) + { + uint64_t nTokenMask = MicroProfileGetGroupMask(S.TimerInfo[i].nToken); + if(nTokenMask & nMask) + { + if(nY >= 0) + CB(i, nCount, nMask, nX, nY, pData); + + nCount += 2; + nY += MICROPROFILE_TEXT_HEIGHT + 1; + + if(nY > (int)UI.nHeight) + return; + } + } + + } + } +} + + +void MicroProfileCalcTimers(float* pTimers, float* pAverage, float* pMax, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, uint64_t nGroup, uint32_t nSize) +{ + MicroProfile& S = *MicroProfileGet(); + + uint32_t nCount = 0; + uint64_t nMask = 1; + + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + if(nMask & nGroup) + { + const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + for(uint32_t i = 0; i < S.nTotalTimers;++i) + { + uint64_t nTokenMask = MicroProfileGetGroupMask(S.TimerInfo[i].nToken); + if(nTokenMask & nMask) + { + { + uint32_t nTimer = i; + uint32_t nIdx = nCount; + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1; + float fToPrc = S.fRcpReferenceTime; + float fMs = fToMs * (S.Frame[nTimer].nTicks); + float fPrc = MicroProfileMin(fMs * fToPrc, 1.f); + float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames); + float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f); + float fMaxMs = fToMs * (S.AggregateMax[nTimer]); + float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f); + float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount); + float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f); + float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]); + float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f); + float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames); + float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f); + float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]); + float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f); + pTimers[nIdx] = fMs; + pTimers[nIdx+1] = fPrc; + pAverage[nIdx] = fAverageMs; + pAverage[nIdx+1] = fAveragePrc; + pMax[nIdx] = fMaxMs; + pMax[nIdx+1] = fMaxPrc; + pCallAverage[nIdx] = fCallAverageMs; + pCallAverage[nIdx+1] = fCallAveragePrc; + pExclusive[nIdx] = fMsExclusive; + pExclusive[nIdx+1] = fPrcExclusive; + pAverageExclusive[nIdx] = fAverageMsExclusive; + pAverageExclusive[nIdx+1] = fAveragePrcExclusive; + pMaxExclusive[nIdx] = fMaxMsExclusive; + pMaxExclusive[nIdx+1] = fMaxPrcExclusive; + } + nCount += 2; + } + } + } + nMask <<= 1ll; + } +} + +#define SBUF_MAX 32 + +void MicroProfileDrawBarArrayCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra) +{ + const uint32_t nHeight = MICROPROFILE_TEXT_HEIGHT; + const uint32_t nTextWidth = 6 * (1+MICROPROFILE_TEXT_WIDTH); + const float fWidth = (float)MICROPROFILE_BAR_WIDTH; + + float* pTimers = ((float**)pExtra)[0]; + float* pTimers2 = ((float**)pExtra)[1]; + MicroProfile& S = *MicroProfileGet(); + char sBuffer[SBUF_MAX]; + if (pTimers2 && pTimers2[nIdx] > 0.1f) + snprintf(sBuffer, SBUF_MAX-1, "%5.2f %3.1fx", pTimers[nIdx], pTimers[nIdx] / pTimers2[nIdx]); + else + snprintf(sBuffer, SBUF_MAX-1, "%5.2f", pTimers[nIdx]); + if (!pTimers2) + MicroProfileDrawBox(nX + nTextWidth, nY, nX + nTextWidth + fWidth * pTimers[nIdx+1], nY + nHeight, UI.nOpacityForeground|S.TimerInfo[nTimer].nColor, MicroProfileBoxTypeBar); + MicroProfileDrawText(nX, nY, (uint32_t)-1, sBuffer, (uint32_t)strlen(sBuffer)); +} + + +uint32_t MicroProfileDrawBarArray(int32_t nX, int32_t nY, float* pTimers, const char* pName, uint32_t nTotalHeight, float* pTimers2 = NULL) +{ + const uint32_t nTextWidth = 6 * (1+MICROPROFILE_TEXT_WIDTH); + const uint32_t nWidth = MICROPROFILE_BAR_WIDTH; + + MicroProfileDrawLineVertical(nX-5, 0, nTotalHeight+nY, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]); + float* pTimersArray[2] = {pTimers, pTimers2}; + MicroProfileLoopActiveGroupsDraw(nX, nY, pName, MicroProfileDrawBarArrayCallback, pTimersArray); + MicroProfileDrawHeader(nX, nTextWidth + nWidth, pName); + return nWidth + 5 + nTextWidth; + +} +void MicroProfileDrawBarCallCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra) +{ + MicroProfile& S = *MicroProfileGet(); + char sBuffer[SBUF_MAX]; + int nLen = snprintf(sBuffer, SBUF_MAX-1, "%5d", S.Frame[nTimer].nCount);//fix + MicroProfileDrawText(nX, nY, (uint32_t)-1, sBuffer, nLen); +} + +uint32_t MicroProfileDrawBarCallCount(int32_t nX, int32_t nY, const char* pName) +{ + MicroProfileLoopActiveGroupsDraw(nX, nY, pName, MicroProfileDrawBarCallCountCallback, 0); + const uint32_t nTextWidth = 6 * MICROPROFILE_TEXT_WIDTH; + MicroProfileDrawHeader(nX, 5 + nTextWidth, pName); + return 5 + nTextWidth; +} + +struct MicroProfileMetaAverageArgs +{ + uint64_t* pCounters; + float fRcpFrames; +}; + +void MicroProfileDrawBarMetaAverageCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra) +{ + MicroProfileMetaAverageArgs* pArgs = (MicroProfileMetaAverageArgs*)pExtra; + uint64_t* pCounters = pArgs->pCounters; + float fRcpFrames = pArgs->fRcpFrames; + char sBuffer[SBUF_MAX]; + int nLen = snprintf(sBuffer, SBUF_MAX-1, "%5.2f", pCounters[nTimer] * fRcpFrames); + MicroProfileDrawText(nX - nLen * (MICROPROFILE_TEXT_WIDTH+1), nY, (uint32_t)-1, sBuffer, nLen); +} + +uint32_t MicroProfileDrawBarMetaAverage(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight) +{ + if(!pName) + return 0; + MicroProfileDrawLineVertical(nX-5, 0, nTotalHeight+nY, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]); + uint32_t nTextWidth = (1+MICROPROFILE_TEXT_WIDTH) * MicroProfileMax<uint32_t>(6, (uint32_t)strlen(pName)); + float fRcpFrames = 1.f / (MicroProfileGet()->nAggregateFrames ? MicroProfileGet()->nAggregateFrames : 1); + MicroProfileMetaAverageArgs Args = {pCounters, fRcpFrames}; + MicroProfileLoopActiveGroupsDraw(nX + nTextWidth, nY, pName, MicroProfileDrawBarMetaAverageCallback, &Args); + MicroProfileDrawHeader(nX, 5 + nTextWidth, pName); + return 5 + nTextWidth; +} + + +void MicroProfileDrawBarMetaCountCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra) +{ + uint64_t* pCounters = (uint64_t*)pExtra; + char sBuffer[SBUF_MAX]; + int nLen = snprintf(sBuffer, SBUF_MAX-1, "%5llu", pCounters[nTimer]); + MicroProfileDrawText(nX - nLen * (MICROPROFILE_TEXT_WIDTH+1), nY, (uint32_t)-1, sBuffer, nLen); +} + +uint32_t MicroProfileDrawBarMetaCount(int32_t nX, int32_t nY, uint64_t* pCounters, const char* pName, uint32_t nTotalHeight) +{ + if(!pName) + return 0; + + MicroProfileDrawLineVertical(nX-5, 0, nTotalHeight+nY, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]); + uint32_t nTextWidth = (1+MICROPROFILE_TEXT_WIDTH) * MicroProfileMax<uint32_t>(6, (uint32_t)strlen(pName)); + MicroProfileLoopActiveGroupsDraw(nX + nTextWidth, nY, pName, MicroProfileDrawBarMetaCountCallback, pCounters); + MicroProfileDrawHeader(nX, 5 + nTextWidth, pName); + return 5 + nTextWidth; +} + +void MicroProfileDrawBarLegendCallback(uint32_t nTimer, uint32_t nIdx, uint64_t nGroupMask, uint32_t nX, uint32_t nY, void* pExtra) +{ + MicroProfile& S = *MicroProfileGet(); + if (S.TimerInfo[nTimer].bGraph) + { + MicroProfileDrawText(nX, nY, S.TimerInfo[nTimer].nColor, ">", 1); + } + MicroProfileDrawTextRight(nX, nY, S.TimerInfo[nTimer].nColor, S.TimerInfo[nTimer].pName, (uint32_t)strlen(S.TimerInfo[nTimer].pName)); + if(UI.nMouseY >= nY && UI.nMouseY < nY + MICROPROFILE_TEXT_HEIGHT+1) + { + UI.nHoverToken = nTimer; + UI.nHoverTime = 0; + } +} + +uint32_t MicroProfileDrawBarLegend(int32_t nX, int32_t nY, uint32_t nTotalHeight, uint32_t nMaxWidth) +{ + MicroProfileDrawLineVertical(nX-5, nY, nTotalHeight, UI.nOpacityBackground | g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]); + MicroProfileLoopActiveGroupsDraw(nMaxWidth, nY, 0, MicroProfileDrawBarLegendCallback, 0); + return nX; +} + +bool MicroProfileDrawGraph(uint32_t nScreenWidth, uint32_t nScreenHeight) +{ + MicroProfile& S = *MicroProfileGet(); + + MICROPROFILE_SCOPE(g_MicroProfileDrawGraph); + bool bEnabled = false; + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN) + bEnabled = true; + if(!bEnabled) + return false; + + uint32_t nX = nScreenWidth - MICROPROFILE_GRAPH_WIDTH; + uint32_t nY = nScreenHeight - MICROPROFILE_GRAPH_HEIGHT; + MicroProfileDrawBox(nX, nY, nX + MICROPROFILE_GRAPH_WIDTH, nY + MICROPROFILE_GRAPH_HEIGHT, 0x88000000 | g_nMicroProfileBackColors[0]); + bool bMouseOver = UI.nMouseX >= nX && UI.nMouseY >= nY; + float fMouseXPrc =(float(UI.nMouseX - nX)) / MICROPROFILE_GRAPH_WIDTH; + if(bMouseOver) + { + float fXAvg = fMouseXPrc * MICROPROFILE_GRAPH_WIDTH + nX; + MicroProfileDrawLineVertical(fXAvg, nY, nY + MICROPROFILE_GRAPH_HEIGHT, (uint32_t)-1); + } + + + float fY = (float)nScreenHeight; + float fDX = MICROPROFILE_GRAPH_WIDTH * 1.f / MICROPROFILE_GRAPH_HISTORY; + float fDY = MICROPROFILE_GRAPH_HEIGHT; + uint32_t nPut = S.nGraphPut; + float* pGraphData = (float*)alloca(sizeof(float)* MICROPROFILE_GRAPH_HISTORY*2); + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN) + { + uint32_t nGroupId = MicroProfileGetGroupIndex(S.Graph[i].nToken); + bool bGpu = S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu; + float fToMs = MicroProfileTickToMsMultiplier(bGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + float fToPrc = fToMs * S.fRcpReferenceTime * 3 / 4; + + float fX = (float)nX; + for(uint32_t j = 0; j < MICROPROFILE_GRAPH_HISTORY; ++j) + { + float fWeigth = MicroProfileMin(fToPrc * (S.Graph[i].nHistory[(j+nPut)%MICROPROFILE_GRAPH_HISTORY]), 1.f); + pGraphData[(j*2)] = fX; + pGraphData[(j*2)+1] = fY - fDY * fWeigth; + fX += fDX; + } + MicroProfileDrawLine2D(MICROPROFILE_GRAPH_HISTORY, pGraphData, S.TimerInfo[MicroProfileGetTimerIndex(S.Graph[i].nToken)].nColor); + } + } + { + float fY1 = 0.25f * MICROPROFILE_GRAPH_HEIGHT + nY; + float fY2 = 0.50f * MICROPROFILE_GRAPH_HEIGHT + nY; + float fY3 = 0.75f * MICROPROFILE_GRAPH_HEIGHT + nY; + MicroProfileDrawLineHorizontal(nX, nX + MICROPROFILE_GRAPH_WIDTH, fY1, 0xffdd4444); + MicroProfileDrawLineHorizontal(nX, nX + MICROPROFILE_GRAPH_WIDTH, fY2, 0xff000000| g_nMicroProfileBackColors[0]); + MicroProfileDrawLineHorizontal(nX, nX + MICROPROFILE_GRAPH_WIDTH, fY3, 0xff000000|g_nMicroProfileBackColors[0]); + + char buf[32]; + int nLen = snprintf(buf, sizeof(buf)-1, "%5.2fms", S.fReferenceTime); + MicroProfileDrawText(nX+1, fY1 - (2+MICROPROFILE_TEXT_HEIGHT), (uint32_t)-1, buf, nLen); + } + + + + if(bMouseOver) + { + uint32_t pColors[MICROPROFILE_MAX_GRAPHS]; + MicroProfileStringArray Strings; + MicroProfileStringArrayClear(&Strings); + uint32_t nTextCount = 0; + uint32_t nGraphIndex = (S.nGraphPut + MICROPROFILE_GRAPH_HISTORY - int(MICROPROFILE_GRAPH_HISTORY*(1.f - fMouseXPrc))) % MICROPROFILE_GRAPH_HISTORY; + + uint32_t nX = UI.nMouseX; + uint32_t nY = UI.nMouseY + 20; + + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN) + { + uint32_t nGroupId = MicroProfileGetGroupIndex(S.Graph[i].nToken); + bool bGpu = S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu; + float fToMs = MicroProfileTickToMsMultiplier(bGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + uint32_t nIndex = MicroProfileGetTimerIndex(S.Graph[i].nToken); + uint32_t nColor = S.TimerInfo[nIndex].nColor; + const char* pName = S.TimerInfo[nIndex].pName; + pColors[nTextCount++] = nColor; + MicroProfileStringArrayAddLiteral(&Strings, pName); + MicroProfileStringArrayFormat(&Strings, "%5.2fms", fToMs * (S.Graph[i].nHistory[nGraphIndex])); + } + } + if(nTextCount) + { + MicroProfileDrawFloatWindow(nX, nY, Strings.ppStrings, Strings.nNumStrings, 0, pColors); + } + + if(UI.nMouseRight) + { + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN; + } + } + } + + return bMouseOver; +} + +void MicroProfileDumpTimers() +{ + MicroProfile& S = *MicroProfileGet(); + + uint64_t nActiveGroup = S.nGroupMask; + + uint32_t nNumTimers = S.nTotalTimers; + uint32_t nBlockSize = 2 * nNumTimers; + float* pTimers = (float*)alloca(nBlockSize * 7 * sizeof(float)); + float* pAverage = pTimers + nBlockSize; + float* pMax = pTimers + 2 * nBlockSize; + float* pCallAverage = pTimers + 3 * nBlockSize; + float* pTimersExclusive = pTimers + 4 * nBlockSize; + float* pAverageExclusive = pTimers + 5 * nBlockSize; + float* pMaxExclusive = pTimers + 6 * nBlockSize; + MicroProfileCalcTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, nActiveGroup, nNumTimers); + + MICROPROFILE_PRINTF("%11s, ", "Time"); + MICROPROFILE_PRINTF("%11s, ", "Average"); + MICROPROFILE_PRINTF("%11s, ", "Max"); + MICROPROFILE_PRINTF("%11s, ", "Call Avg"); + MICROPROFILE_PRINTF("%9s, ", "Count"); + MICROPROFILE_PRINTF("%11s, ", "Excl"); + MICROPROFILE_PRINTF("%11s, ", "Avg Excl"); + MICROPROFILE_PRINTF("%11s, \n", "Max Excl"); + + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + uint64_t nMask = 1ll << j; + if(nMask & nActiveGroup) + { + MICROPROFILE_PRINTF("%s\n", S.GroupInfo[j].pName); + for(uint32_t i = 0; i < S.nTotalTimers;++i) + { + uint64_t nTokenMask = MicroProfileGetGroupMask(S.TimerInfo[i].nToken); + if(nTokenMask & nMask) + { + uint32_t nIdx = i * 2; + MICROPROFILE_PRINTF("%9.2fms, ", pTimers[nIdx]); + MICROPROFILE_PRINTF("%9.2fms, ", pAverage[nIdx]); + MICROPROFILE_PRINTF("%9.2fms, ", pMax[nIdx]); + MICROPROFILE_PRINTF("%9.2fms, ", pCallAverage[nIdx]); + MICROPROFILE_PRINTF("%9d, ", S.Frame[i].nCount); + MICROPROFILE_PRINTF("%9.2fms, ", pTimersExclusive[nIdx]); + MICROPROFILE_PRINTF("%9.2fms, ", pAverageExclusive[nIdx]); + MICROPROFILE_PRINTF("%9.2fms, ", pMaxExclusive[nIdx]); + MICROPROFILE_PRINTF("%s\n", S.TimerInfo[i].pName); + } + } + } + } +} + +void MicroProfileDrawBarView(uint32_t nScreenWidth, uint32_t nScreenHeight) +{ + MicroProfile& S = *MicroProfileGet(); + + uint64_t nActiveGroup = S.nAllGroupsWanted ? S.nGroupMask : S.nActiveGroupWanted; + if(!nActiveGroup) + return; + MICROPROFILE_SCOPE(g_MicroProfileDrawBarView); + + const uint32_t nHeight = MICROPROFILE_TEXT_HEIGHT; + int nColorIndex = 0; + uint32_t nMaxTimerNameLen = 1; + uint32_t nNumTimers = 0; + uint32_t nNumGroups = 0; + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + if(nActiveGroup & (1ll << j)) + { + nNumTimers += S.GroupInfo[j].nNumTimers; + nNumGroups += 1; + nMaxTimerNameLen = MicroProfileMax(nMaxTimerNameLen, S.GroupInfo[j].nMaxTimerNameLen); + } + } + uint32_t nTimerWidth = 2+(4+nMaxTimerNameLen) * (MICROPROFILE_TEXT_WIDTH+1); + uint32_t nX = nTimerWidth + UI.nOffsetX; + uint32_t nY = nHeight + 3 - UI.nOffsetY; + uint32_t nBlockSize = 2 * nNumTimers; + float* pTimers = (float*)alloca(nBlockSize * 7 * sizeof(float)); + float* pAverage = pTimers + nBlockSize; + float* pMax = pTimers + 2 * nBlockSize; + float* pCallAverage = pTimers + 3 * nBlockSize; + float* pTimersExclusive = pTimers + 4 * nBlockSize; + float* pAverageExclusive = pTimers + 5 * nBlockSize; + float* pMaxExclusive = pTimers + 6 * nBlockSize; + MicroProfileCalcTimers(pTimers, pAverage, pMax, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, nActiveGroup, nNumTimers); + uint32_t nWidth = 0; + { + uint32_t nMetaIndex = 0; + for(uint32_t i = 1; i ; i <<= 1) + { + if(S.nBars & i) + { + if(i >= MP_DRAW_META_FIRST) + { + if(nMetaIndex < MICROPROFILE_META_MAX && S.MetaCounters[nMetaIndex].pName) + { + uint32_t nStrWidth = strlen(S.MetaCounters[nMetaIndex].pName); + if(S.nBars & MP_DRAW_TIMERS) + nWidth += 6 + (1+MICROPROFILE_TEXT_WIDTH) * (nStrWidth); + if(S.nBars & MP_DRAW_AVERAGE) + nWidth += 6 + (1+MICROPROFILE_TEXT_WIDTH) * (nStrWidth + 4); + if(S.nBars & MP_DRAW_MAX) + nWidth += 6 + (1+MICROPROFILE_TEXT_WIDTH) * (nStrWidth + 4); + } + } + else + { + nWidth += MICROPROFILE_BAR_WIDTH + 6 + 6 * (1+MICROPROFILE_TEXT_WIDTH); + if(i & MP_DRAW_CALL_COUNT) + nWidth += 6 + 6 * MICROPROFILE_TEXT_WIDTH; + } + } + if(i >= MP_DRAW_META_FIRST) + { + ++nMetaIndex; + } + } + nWidth += (1+nMaxTimerNameLen) * (MICROPROFILE_TEXT_WIDTH+1); + for(uint32_t i = 0; i < nNumTimers+nNumGroups+1; ++i) + { + uint32_t nY0 = nY + i * (nHeight + 1); + bool bInside = (UI.nActiveMenu == -1) && ((UI.nMouseY >= nY0) && (UI.nMouseY < (nY0 + nHeight + 1))); + MicroProfileDrawBox(nX, nY0, nWidth+nX, nY0 + (nHeight+1)+1, UI.nOpacityBackground | (g_nMicroProfileBackColors[nColorIndex++ & 1] + ((bInside) ? 0x002c2c2c : 0))); + } + nX += 10; + } + int nTotalHeight = (nNumTimers+nNumGroups+1) * (nHeight+1); + uint32_t nLegendOffset = 1; + if(S.nBars & MP_DRAW_TIMERS) + nX += MicroProfileDrawBarArray(nX, nY, pTimers, "Time", nTotalHeight) + 1; + if(S.nBars & MP_DRAW_AVERAGE) + nX += MicroProfileDrawBarArray(nX, nY, pAverage, "Average", nTotalHeight) + 1; + if(S.nBars & MP_DRAW_MAX) + nX += MicroProfileDrawBarArray(nX, nY, pMax, (!UI.bShowSpikes) ? "Max Time" : "Max Time, Spike", nTotalHeight, UI.bShowSpikes ? pAverage : NULL) + 1; + if(S.nBars & MP_DRAW_CALL_COUNT) + { + nX += MicroProfileDrawBarArray(nX, nY, pCallAverage, "Call Average", nTotalHeight) + 1; + nX += MicroProfileDrawBarCallCount(nX, nY, "Count") + 1; + } + if(S.nBars & MP_DRAW_TIMERS_EXCLUSIVE) + nX += MicroProfileDrawBarArray(nX, nY, pTimersExclusive, "Exclusive Time", nTotalHeight) + 1; + if(S.nBars & MP_DRAW_AVERAGE_EXCLUSIVE) + nX += MicroProfileDrawBarArray(nX, nY, pAverageExclusive, "Exclusive Average", nTotalHeight) + 1; + if(S.nBars & MP_DRAW_MAX_EXCLUSIVE) + nX += MicroProfileDrawBarArray(nX, nY, pMaxExclusive, (!UI.bShowSpikes) ? "Exclusive Max Time" :"Excl Max Time, Spike", nTotalHeight, UI.bShowSpikes ? pAverageExclusive : NULL) + 1; + + for(int i = 0; i < MICROPROFILE_META_MAX; ++i) + { + if(0 != (S.nBars & (MP_DRAW_META_FIRST<<i)) && S.MetaCounters[i].pName) + { + uint32_t nBufferSize = strlen(S.MetaCounters[i].pName) + 32; + char* buffer = (char*)alloca(nBufferSize); + if(S.nBars & MP_DRAW_TIMERS) + nX += MicroProfileDrawBarMetaCount(nX, nY, &S.MetaCounters[i].nCounters[0], S.MetaCounters[i].pName, nTotalHeight) + 1; + if(S.nBars & MP_DRAW_AVERAGE) + { + snprintf(buffer, nBufferSize-1, "%s Avg", S.MetaCounters[i].pName); + nX += MicroProfileDrawBarMetaAverage(nX, nY, &S.MetaCounters[i].nAggregate[0], buffer, nTotalHeight) + 1; + } + if(S.nBars & MP_DRAW_MAX) + { + snprintf(buffer, nBufferSize-1, "%s Max", S.MetaCounters[i].pName); + nX += MicroProfileDrawBarMetaCount(nX, nY, &S.MetaCounters[i].nAggregateMax[0], buffer, nTotalHeight) + 1; + } + } + } + nX = 0; + nY = nHeight + 3 - UI.nOffsetY; + for(uint32_t i = 0; i < nNumTimers+nNumGroups+1; ++i) + { + uint32_t nY0 = nY + i * (nHeight + 1); + bool bInside = (UI.nActiveMenu == -1) && ((UI.nMouseY >= nY0) && (UI.nMouseY < (nY0 + nHeight + 1))); + MicroProfileDrawBox(nX, nY0, nTimerWidth, nY0 + (nHeight+1)+1, 0xff0000000 | (g_nMicroProfileBackColors[nColorIndex++ & 1] + ((bInside) ? 0x002c2c2c : 0))); + } + nX += MicroProfileDrawBarLegend(nX, nY, nTotalHeight, nTimerWidth-5) + 1; + + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + if(nActiveGroup & (1ll << j)) + { + MicroProfileDrawText(nX, nY + (1+nHeight) * nLegendOffset, (uint32_t)-1, S.GroupInfo[j].pName, S.GroupInfo[j].nNameLen); + nLegendOffset += S.GroupInfo[j].nNumTimers+1; + } + } + MicroProfileDrawHeader(nX, nTimerWidth-5, "Group"); + MicroProfileDrawTextRight(nTimerWidth-3, MICROPROFILE_TEXT_HEIGHT + 2, (uint32_t)-1, "Timer", 5); + MicroProfileDrawLineVertical(nTimerWidth, 0, nTotalHeight+nY, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]); + MicroProfileDrawLineHorizontal(0, nWidth, 2*MICROPROFILE_TEXT_HEIGHT + 3, UI.nOpacityBackground|g_nMicroProfileBackColors[0]|g_nMicroProfileBackColors[1]); +} + +typedef const char* (*MicroProfileSubmenuCallback)(int, bool* bSelected); +typedef void (*MicroProfileClickCallback)(int); + + +const char* MicroProfileUIMenuMode(int nIndex, bool* bSelected) +{ + MicroProfile& S = *MicroProfileGet(); + switch(nIndex) + { + case 0: + *bSelected = S.nDisplay == MP_DRAW_DETAILED; + return "Detailed"; + case 1: + *bSelected = S.nDisplay == MP_DRAW_BARS; + return "Timers"; + case 2: + *bSelected = S.nDisplay == MP_DRAW_HIDDEN; + return "Hidden"; + case 3: + *bSelected = true; + return "Off"; + case 4: + *bSelected = true; + return "------"; + case 5: + *bSelected = S.nForceEnable != 0; + return "Force Enable"; + + default: return 0; + } +} + +const char* MicroProfileUIMenuGroups(int nIndex, bool* bSelected) +{ + MicroProfile& S = *MicroProfileGet(); + *bSelected = false; + if(nIndex == 0) + { + *bSelected = S.nAllGroupsWanted != 0; + return "[ALL]"; + } + else + { + nIndex = nIndex-1; + if(nIndex < UI.GroupMenuCount) + { + MicroProfileGroupMenuItem& Item = UI.GroupMenu[nIndex]; + static char buffer[MICROPROFILE_NAME_MAX_LEN+32]; + if(Item.nIsCategory) + { + uint64_t nGroupMask = S.CategoryInfo[Item.nIndex].nGroupMask; + *bSelected = nGroupMask == (nGroupMask & S.nActiveGroupWanted); + snprintf(buffer, sizeof(buffer)-1, "[%s]", Item.pName); + } + else + { + *bSelected = 0 != (S.nActiveGroupWanted & (1ll << Item.nIndex)); + snprintf(buffer, sizeof(buffer)-1, " %s", Item.pName); + } + return buffer; + } + return 0; + } +} + +const char* MicroProfileUIMenuAggregate(int nIndex, bool* bSelected) +{ + MicroProfile& S = *MicroProfileGet(); + if(nIndex < sizeof(g_MicroProfileAggregatePresets)/sizeof(g_MicroProfileAggregatePresets[0])) + { + int val = g_MicroProfileAggregatePresets[nIndex]; + *bSelected = (int)S.nAggregateFlip == val; + if(0 == val) + return "Infinite"; + else + { + static char buf[128]; + snprintf(buf, sizeof(buf)-1, "%7d", val); + return buf; + } + } + return 0; + +} + +const char* MicroProfileUIMenuTimers(int nIndex, bool* bSelected) +{ + MicroProfile& S = *MicroProfileGet(); + *bSelected = 0 != (S.nBars & (1 << nIndex)); + switch(nIndex) + { + case 0: return "Time"; + case 1: return "Average"; + case 2: return "Max"; + case 3: return "Call Count"; + case 4: return "Exclusive Timers"; + case 5: return "Exclusive Average"; + case 6: return "Exclusive Max"; + } + int nMetaIndex = nIndex - 7; + if(nMetaIndex < MICROPROFILE_META_MAX) + { + return S.MetaCounters[nMetaIndex].pName; + } + return 0; +} + +const char* MicroProfileUIMenuOptions(int nIndex, bool* bSelected) +{ + MicroProfile& S = *MicroProfileGet(); + if(nIndex >= MICROPROFILE_OPTION_SIZE) return 0; + switch(UI.Options[nIndex].nSubType) + { + case 0: + *bSelected = S.fReferenceTime == g_MicroProfileReferenceTimePresets[UI.Options[nIndex].nIndex]; + break; + case 1: + *bSelected = UI.nOpacityBackground>>24 == g_MicroProfileOpacityPresets[UI.Options[nIndex].nIndex]; + break; + case 2: + *bSelected = UI.nOpacityForeground>>24 == g_MicroProfileOpacityPresets[UI.Options[nIndex].nIndex]; + break; + case 3: + *bSelected = UI.bShowSpikes; + break; +#if MICROPROFILE_CONTEXT_SWITCH_TRACE + case 4: + { + switch(UI.Options[nIndex].nIndex) + { + case 0: + *bSelected = S.bContextSwitchRunning; + break; + case 1: + *bSelected = S.bContextSwitchAllThreads; + break; + case 2: + *bSelected = S.bContextSwitchNoBars; + break; + } + } + break; +#endif + } + return UI.Options[nIndex].Text; +} + +const char* MicroProfileUIMenuPreset(int nIndex, bool* bSelected) +{ + static char buf[128]; + *bSelected = false; + int nNumPresets = sizeof(g_MicroProfilePresetNames) / sizeof(g_MicroProfilePresetNames[0]); + int nIndexSave = nIndex - nNumPresets - 1; + if(nIndex == nNumPresets) + return "--"; + else if(nIndexSave >=0 && nIndexSave <nNumPresets) + { + snprintf(buf, sizeof(buf)-1, "Save '%s'", g_MicroProfilePresetNames[nIndexSave]); + return buf; + } + else if(nIndex < nNumPresets) + { + snprintf(buf, sizeof(buf)-1, "Load '%s'", g_MicroProfilePresetNames[nIndex]); + return buf; + } + else + { + return 0; + } +} + +const char* MicroProfileUIMenuCustom(int nIndex, bool* bSelected) +{ + if((uint32_t)-1 == UI.nCustomActive) + { + *bSelected = nIndex == 0; + } + else + { + *bSelected = nIndex-2 == UI.nCustomActive; + } + switch(nIndex) + { + case 0: return "Disable"; + case 1: return "--"; + default: + nIndex -= 2; + if(nIndex < UI.nCustomCount) + { + return UI.Custom[nIndex].pName; + } + else + { + return 0; + } + } +} + +const char* MicroProfileUIMenuEmpty(int nIndex, bool* bSelected) +{ + return 0; +} + + +void MicroProfileUIClickMode(int nIndex) +{ + MicroProfile& S = *MicroProfileGet(); + switch(nIndex) + { + case 0: + S.nDisplay = MP_DRAW_DETAILED; + break; + case 1: + S.nDisplay = MP_DRAW_BARS; + break; + case 2: + S.nDisplay = MP_DRAW_HIDDEN; + break; + case 3: + S.nDisplay = 0; + break; + case 4: + break; + case 5: + S.nForceEnable = !S.nForceEnable; + break; + } +} + +void MicroProfileUIClickGroups(int nIndex) +{ + MicroProfile& S = *MicroProfileGet(); + if(nIndex == 0) + S.nAllGroupsWanted = 1-S.nAllGroupsWanted; + else + { + nIndex -= 1; + if(nIndex < UI.GroupMenuCount) + { + MicroProfileGroupMenuItem& Item = UI.GroupMenu[nIndex]; + if(Item.nIsCategory) + { + uint64_t nGroupMask = S.CategoryInfo[Item.nIndex].nGroupMask; + if(nGroupMask != (nGroupMask & S.nActiveGroupWanted)) + { + S.nActiveGroupWanted |= nGroupMask; + } + else + { + S.nActiveGroupWanted &= ~nGroupMask; + } + } + else + { + MP_ASSERT(Item.nIndex < S.nGroupCount); + S.nActiveGroupWanted ^= (1ll << Item.nIndex); + } + } + } +} + +void MicroProfileUIClickAggregate(int nIndex) +{ + MicroProfile& S = *MicroProfileGet(); + S.nAggregateFlip = g_MicroProfileAggregatePresets[nIndex]; + if(0 == S.nAggregateFlip) + { + S.nAggregateClear = 1; + } +} + +void MicroProfileUIClickTimers(int nIndex) +{ + MicroProfile& S = *MicroProfileGet(); + S.nBars ^= (1 << nIndex); +} + +void MicroProfileUIClickOptions(int nIndex) +{ + MicroProfile& S = *MicroProfileGet(); + switch(UI.Options[nIndex].nSubType) + { + case 0: + S.fReferenceTime = g_MicroProfileReferenceTimePresets[UI.Options[nIndex].nIndex]; + S.fRcpReferenceTime = 1.f / S.fReferenceTime; + break; + case 1: + UI.nOpacityBackground = g_MicroProfileOpacityPresets[UI.Options[nIndex].nIndex]<<24; + break; + case 2: + UI.nOpacityForeground = g_MicroProfileOpacityPresets[UI.Options[nIndex].nIndex]<<24; + break; + case 3: + UI.bShowSpikes = !UI.bShowSpikes; + break; +#if MICROPROFILE_CONTEXT_SWITCH_TRACE + case 4: + { + switch(UI.Options[nIndex].nIndex) + { + case 0: + if(S.bContextSwitchRunning) + { + MicroProfileStopContextSwitchTrace(); + } + else + { + MicroProfileStartContextSwitchTrace(); + } + break; + case 1: + S.bContextSwitchAllThreads = !S.bContextSwitchAllThreads; + break; + case 2: + S.bContextSwitchNoBars= !S.bContextSwitchNoBars; + break; + + } + } + break; +#endif + } +} + +void MicroProfileUIClickPreset(int nIndex) +{ + int nNumPresets = sizeof(g_MicroProfilePresetNames) / sizeof(g_MicroProfilePresetNames[0]); + int nIndexSave = nIndex - nNumPresets - 1; + if(nIndexSave >= 0 && nIndexSave < nNumPresets) + { + MicroProfileSavePreset(g_MicroProfilePresetNames[nIndexSave]); + } + else if(nIndex >= 0 && nIndex < nNumPresets) + { + MicroProfileLoadPreset(g_MicroProfilePresetNames[nIndex]); + } +} + +void MicroProfileUIClickCustom(int nIndex) +{ + if(nIndex == 0) + { + MicroProfileCustomGroupDisable(); + } + else + { + MicroProfileCustomGroupEnable(nIndex-2); + } + +} + +void MicroProfileUIClickEmpty(int nIndex) +{ + +} + + +void MicroProfileDrawMenu(uint32_t nWidth, uint32_t nHeight) +{ + MicroProfile& S = *MicroProfileGet(); + + uint32_t nX = 0; + uint32_t nY = 0; + bool bMouseOver = UI.nMouseY < MICROPROFILE_TEXT_HEIGHT + 1; +#define SBUF_SIZE 256 + char buffer[256]; + MicroProfileDrawBox(nX, nY, nX + nWidth, nY + (MICROPROFILE_TEXT_HEIGHT+1)+1, 0xff000000|g_nMicroProfileBackColors[1]); + +#define MICROPROFILE_MENU_MAX 16 + const char* pMenuText[MICROPROFILE_MENU_MAX] = {0}; + uint32_t nMenuX[MICROPROFILE_MENU_MAX] = {0}; + uint32_t nNumMenuItems = 0; + + int nLen = snprintf(buffer, 127, "MicroProfile"); + MicroProfileDrawText(nX, nY, (uint32_t)-1, buffer, nLen); + nX += (sizeof("MicroProfile")+2) * (MICROPROFILE_TEXT_WIDTH+1); + pMenuText[nNumMenuItems++] = "Mode"; + pMenuText[nNumMenuItems++] = "Groups"; + char AggregateText[64]; + snprintf(AggregateText, sizeof(AggregateText)-1, "Aggregate[%d]", S.nAggregateFlip ? S.nAggregateFlip : S.nAggregateFlipCount); + pMenuText[nNumMenuItems++] = &AggregateText[0]; + pMenuText[nNumMenuItems++] = "Timers"; + pMenuText[nNumMenuItems++] = "Options"; + pMenuText[nNumMenuItems++] = "Preset"; + pMenuText[nNumMenuItems++] = "Custom"; + const int nPauseIndex = nNumMenuItems; + pMenuText[nNumMenuItems++] = S.nRunning ? "Pause" : "Unpause"; + pMenuText[nNumMenuItems++] = "Help"; + + if(S.nOverflow) + { + pMenuText[nNumMenuItems++] = "!BUFFERSFULL!"; + } + + + if(UI.GroupMenuCount != S.nGroupCount + S.nCategoryCount) + { + UI.GroupMenuCount = S.nGroupCount + S.nCategoryCount; + for(uint32_t i = 0; i < S.nCategoryCount; ++i) + { + UI.GroupMenu[i].nIsCategory = 1; + UI.GroupMenu[i].nCategoryIndex = i; + UI.GroupMenu[i].nIndex = i; + UI.GroupMenu[i].pName = S.CategoryInfo[i].pName; + } + for(uint32_t i = 0; i < S.nGroupCount; ++i) + { + uint32_t idx = i + S.nCategoryCount; + UI.GroupMenu[idx].nIsCategory = 0; + UI.GroupMenu[idx].nCategoryIndex = S.GroupInfo[i].nCategory; + UI.GroupMenu[idx].nIndex = i; + UI.GroupMenu[idx].pName = S.GroupInfo[i].pName; + } + std::sort(&UI.GroupMenu[0], &UI.GroupMenu[UI.GroupMenuCount], + [] (const MicroProfileGroupMenuItem& l, const MicroProfileGroupMenuItem& r) -> bool + { + if(l.nCategoryIndex < r.nCategoryIndex) + { + return true; + } + else if(r.nCategoryIndex < l.nCategoryIndex) + { + return false; + } + if(r.nIsCategory || l.nIsCategory) + { + return l.nIsCategory > r.nIsCategory; + } + return MP_STRCASECMP(l.pName, r.pName)<0; + } + ); + } + + MicroProfileSubmenuCallback GroupCallback[MICROPROFILE_MENU_MAX] = + { + MicroProfileUIMenuMode, + MicroProfileUIMenuGroups, + MicroProfileUIMenuAggregate, + MicroProfileUIMenuTimers, + MicroProfileUIMenuOptions, + MicroProfileUIMenuPreset, + MicroProfileUIMenuCustom, + MicroProfileUIMenuEmpty, + MicroProfileUIMenuEmpty, + MicroProfileUIMenuEmpty, + }; + + MicroProfileClickCallback CBClick[MICROPROFILE_MENU_MAX] = + { + MicroProfileUIClickMode, + MicroProfileUIClickGroups, + MicroProfileUIClickAggregate, + MicroProfileUIClickTimers, + MicroProfileUIClickOptions, + MicroProfileUIClickPreset, + MicroProfileUIClickCustom, + MicroProfileUIClickEmpty, + MicroProfileUIClickEmpty, + MicroProfileUIClickEmpty, + }; + + + uint32_t nSelectMenu = (uint32_t)-1; + for(uint32_t i = 0; i < nNumMenuItems; ++i) + { + nMenuX[i] = nX; + uint32_t nLen = (uint32_t)strlen(pMenuText[i]); + uint32_t nEnd = nX + nLen * (MICROPROFILE_TEXT_WIDTH+1); + if(UI.nMouseY <= MICROPROFILE_TEXT_HEIGHT && UI.nMouseX <= nEnd && UI.nMouseX >= nX) + { + MicroProfileDrawBox(nX-1, nY, nX + nLen * (MICROPROFILE_TEXT_WIDTH+1), nY +(MICROPROFILE_TEXT_HEIGHT+1)+1, 0xff888888); + nSelectMenu = i; + if((UI.nMouseLeft || UI.nMouseRight) && i == (int)nPauseIndex) + { + S.nToggleRunning = 1; + } + } + MicroProfileDrawText(nX, nY, (uint32_t)-1, pMenuText[i], (uint32_t)strlen(pMenuText[i])); + nX += (nLen+1) * (MICROPROFILE_TEXT_WIDTH+1); + } + uint32_t nMenu = nSelectMenu != (uint32_t)-1 ? nSelectMenu : UI.nActiveMenu; + UI.nActiveMenu = nMenu; + if((uint32_t)-1 != nMenu) + { + nX = nMenuX[nMenu]; + nY += MICROPROFILE_TEXT_HEIGHT+1; + MicroProfileSubmenuCallback CB = GroupCallback[nMenu]; + int nNumLines = 0; + bool bSelected = false; + const char* pString = CB(nNumLines, &bSelected); + uint32_t nWidth = 0, nHeight = 0; + while(pString) + { + nWidth = MicroProfileMax<int>(nWidth, (int)strlen(pString)); + nNumLines++; + pString = CB(nNumLines, &bSelected); + } + nWidth = (2+nWidth) * (MICROPROFILE_TEXT_WIDTH+1); + nHeight = nNumLines * (MICROPROFILE_TEXT_HEIGHT+1); + if(UI.nMouseY <= nY + nHeight+0 && UI.nMouseY >= nY-0 && UI.nMouseX <= nX + nWidth + 0 && UI.nMouseX >= nX - 0) + { + UI.nActiveMenu = nMenu; + } + else if(nSelectMenu == (uint32_t)-1) + { + UI.nActiveMenu = (uint32_t)-1; + } + MicroProfileDrawBox(nX, nY, nX + nWidth, nY + nHeight, 0xff000000|g_nMicroProfileBackColors[1]); + for(int i = 0; i < nNumLines; ++i) + { + bool bSelected = false; + const char* pString = CB(i, &bSelected); + if(UI.nMouseY >= nY && UI.nMouseY < nY + MICROPROFILE_TEXT_HEIGHT + 1) + { + bMouseOver = true; + if(UI.nMouseLeft || UI.nMouseRight) + { + CBClick[nMenu](i); + } + MicroProfileDrawBox(nX, nY, nX + nWidth, nY + MICROPROFILE_TEXT_HEIGHT + 1, 0xff888888); + } + int nLen = snprintf(buffer, SBUF_SIZE-1, "%c %s", bSelected ? '*' : ' ' ,pString); + MicroProfileDrawText(nX, nY, (uint32_t)-1, buffer, nLen); + nY += MICROPROFILE_TEXT_HEIGHT+1; + } + } + + + { + static char FrameTimeMessage[64]; + float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + float fMs = fToMs * (S.nFlipTicks); + float fAverageMs = fToMs * (S.nFlipAggregateDisplay / nAggregateFrames); + float fMaxMs = fToMs * S.nFlipMaxDisplay; + int nLen = snprintf(FrameTimeMessage, sizeof(FrameTimeMessage)-1, "Time[%6.2f] Avg[%6.2f] Max[%6.2f]", fMs, fAverageMs, fMaxMs); + pMenuText[nNumMenuItems++] = &FrameTimeMessage[0]; + MicroProfileDrawText(nWidth - nLen * (MICROPROFILE_TEXT_WIDTH+1), 0, -1, FrameTimeMessage, nLen); + } +} + + +void MicroProfileMoveGraph() +{ + + int nZoom = UI.nMouseWheelDelta; + int nPanX = 0; + int nPanY = 0; + static int X = 0, Y = 0; + if(UI.nMouseDownLeft && !UI.nModDown) + { + nPanX = UI.nMouseX - X; + nPanY = UI.nMouseY - Y; + } + X = UI.nMouseX; + Y = UI.nMouseY; + + if(nZoom) + { + float fOldRange = UI.fDetailedRange; + if(nZoom>0) + { + UI.fDetailedRangeTarget = UI.fDetailedRange *= UI.nModDown ? 1.40f : 1.05f; + } + else + { + float fNewDetailedRange = UI.fDetailedRange / (UI.nModDown ? 1.40f : 1.05f); + if(fNewDetailedRange < 1e-4f) //100ns + fNewDetailedRange = 1e-4f; + UI.fDetailedRangeTarget = UI.fDetailedRange = fNewDetailedRange; + } + + float fDiff = fOldRange - UI.fDetailedRange; + float fMousePrc = MicroProfileMax((float)UI.nMouseX / UI.nWidth ,0.f); + UI.fDetailedOffsetTarget = UI.fDetailedOffset += fDiff * fMousePrc; + + } + if(nPanX) + { + UI.fDetailedOffsetTarget = UI.fDetailedOffset += -nPanX * UI.fDetailedRange / UI.nWidth; + } + UI.nOffsetY -= nPanY; + UI.nOffsetX += nPanX; + if(UI.nOffsetX > 0) + UI.nOffsetX = 0; + if(UI.nOffsetY<0) + UI.nOffsetY = 0; +} + +void MicroProfileDrawCustom(uint32_t nWidth, uint32_t nHeight) +{ + if((uint32_t)-1 != UI.nCustomActive) + { + MicroProfile& S = *MicroProfileGet(); + MP_ASSERT(UI.nCustomActive < MICROPROFILE_CUSTOM_MAX); + MicroProfileCustom* pCustom = &UI.Custom[UI.nCustomActive]; + uint32_t nCount = pCustom->nNumTimers; + uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1; + uint32_t nExtraOffset = 1 + ((pCustom->nFlags & MICROPROFILE_CUSTOM_STACK) != 0 ? 3 : 0); + uint32_t nOffsetYBase = nHeight - (nExtraOffset+nCount)* (1+MICROPROFILE_TEXT_HEIGHT) - MICROPROFILE_CUSTOM_PADDING; + uint32_t nOffsetY = nOffsetYBase; + float fReference = pCustom->fReference; + float fRcpReference = 1.f / fReference; + uint32_t nReducedWidth = UI.nWidth - 2*MICROPROFILE_CUSTOM_PADDING - MICROPROFILE_GRAPH_WIDTH; + + char Buffer[MICROPROFILE_NAME_MAX_LEN*2+1]; + float* pTime = (float*)alloca(sizeof(float)*nCount); + float* pTimeAvg = (float*)alloca(sizeof(float)*nCount); + float* pTimeMax = (float*)alloca(sizeof(float)*nCount); + uint32_t* pColors = (uint32_t*)alloca(sizeof(uint32_t)*nCount); + uint32_t nMaxOffsetX = 0; + MicroProfileDrawBox(MICROPROFILE_CUSTOM_PADDING-1, nOffsetY-1, MICROPROFILE_CUSTOM_PADDING+nReducedWidth+1, UI.nHeight - MICROPROFILE_CUSTOM_PADDING+1, 0x88000000|g_nMicroProfileBackColors[0]); + + for(uint32_t i = 0; i < nCount; ++i) + { + uint16_t nTimerIndex = MicroProfileGetTimerIndex(pCustom->pTimers[i]); + uint16_t nGroupIndex = MicroProfileGetGroupIndex(pCustom->pTimers[i]); + float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu()); + pTime[i] = S.Frame[nTimerIndex].nTicks * fToMs; + pTimeAvg[i] = fToMs * (S.Aggregate[nTimerIndex].nTicks / nAggregateFrames); + pTimeMax[i] = fToMs * (S.AggregateMax[nTimerIndex]); + pColors[i] = S.TimerInfo[nTimerIndex].nColor; + } + + MicroProfileDrawText(MICROPROFILE_CUSTOM_PADDING + 3*MICROPROFILE_TEXT_WIDTH, nOffsetY, (uint32_t)-1, "Avg", sizeof("Avg")-1); + MicroProfileDrawText(MICROPROFILE_CUSTOM_PADDING + 13*MICROPROFILE_TEXT_WIDTH, nOffsetY, (uint32_t)-1, "Max", sizeof("Max")-1); + for(uint32_t i = 0; i < nCount; ++i) + { + nOffsetY += (1+MICROPROFILE_TEXT_HEIGHT); + uint16_t nTimerIndex = MicroProfileGetTimerIndex(pCustom->pTimers[i]); + uint16_t nGroupIndex = MicroProfileGetGroupIndex(pCustom->pTimers[i]); + MicroProfileTimerInfo* pTimerInfo = &S.TimerInfo[nTimerIndex]; + int nSize; + uint32_t nOffsetX = MICROPROFILE_CUSTOM_PADDING; + nSize = snprintf(Buffer, sizeof(Buffer)-1, "%6.2f", pTimeAvg[i]); + MicroProfileDrawText(nOffsetX, nOffsetY, (uint32_t)-1, Buffer, nSize); + nOffsetX += (nSize+2) * (MICROPROFILE_TEXT_WIDTH+1); + nSize = snprintf(Buffer, sizeof(Buffer)-1, "%6.2f", pTimeMax[i]); + MicroProfileDrawText(nOffsetX, nOffsetY, (uint32_t)-1, Buffer, nSize); + nOffsetX += (nSize+2) * (MICROPROFILE_TEXT_WIDTH+1); + nSize = snprintf(Buffer, sizeof(Buffer)-1, "%s:%s", S.GroupInfo[nGroupIndex].pName, pTimerInfo->pName); + MicroProfileDrawText(nOffsetX, nOffsetY, pTimerInfo->nColor, Buffer, nSize); + nOffsetX += (nSize+2) * (MICROPROFILE_TEXT_WIDTH+1); + nMaxOffsetX = MicroProfileMax(nMaxOffsetX, nOffsetX); + } + uint32_t nMaxWidth = nReducedWidth- nMaxOffsetX; + + if(pCustom->nFlags & MICROPROFILE_CUSTOM_BARS) + { + nOffsetY = nOffsetYBase; + float* pMs = pCustom->nFlags & MICROPROFILE_CUSTOM_BAR_SOURCE_MAX ? pTimeMax : pTimeAvg; + const char* pString = pCustom->nFlags & MICROPROFILE_CUSTOM_BAR_SOURCE_MAX ? "Max" : "Avg"; + MicroProfileDrawText(nMaxOffsetX, nOffsetY, (uint32_t)-1, pString, strlen(pString)); + int nSize = snprintf(Buffer, sizeof(Buffer)-1, "%6.2fms", fReference); + MicroProfileDrawText(nReducedWidth - (1+nSize) * (MICROPROFILE_TEXT_WIDTH+1), nOffsetY, (uint32_t)-1, Buffer, nSize); + for(uint32_t i = 0; i < nCount; ++i) + { + nOffsetY += (1+MICROPROFILE_TEXT_HEIGHT); + uint32_t nWidth = MicroProfileMin(nMaxWidth, (uint32_t)(nMaxWidth * pMs[i] * fRcpReference)); + MicroProfileDrawBox(nMaxOffsetX, nOffsetY, nMaxOffsetX+nWidth, nOffsetY+MICROPROFILE_TEXT_HEIGHT, pColors[i]|0xff000000); + } + } + if(pCustom->nFlags & MICROPROFILE_CUSTOM_STACK) + { + nOffsetY += 2*(1+MICROPROFILE_TEXT_HEIGHT); + const char* pString = pCustom->nFlags & MICROPROFILE_CUSTOM_STACK_SOURCE_MAX ? "Max" : "Avg"; + MicroProfileDrawText(MICROPROFILE_CUSTOM_PADDING, nOffsetY, (uint32_t)-1, pString, strlen(pString)); + int nSize = snprintf(Buffer, sizeof(Buffer)-1, "%6.2fms", fReference); + MicroProfileDrawText(nReducedWidth - (1+nSize) * (MICROPROFILE_TEXT_WIDTH+1), nOffsetY, (uint32_t)-1, Buffer, nSize); + nOffsetY += (1+MICROPROFILE_TEXT_HEIGHT); + float fPosX = MICROPROFILE_CUSTOM_PADDING; + float* pMs = pCustom->nFlags & MICROPROFILE_CUSTOM_STACK_SOURCE_MAX ? pTimeMax : pTimeAvg; + for(uint32_t i = 0; i < nCount; ++i) + { + float fWidth = pMs[i] * fRcpReference * nReducedWidth; + uint32_t nX = fPosX; + fPosX += fWidth; + uint32_t nXEnd = fPosX; + if(nX < nXEnd) + { + MicroProfileDrawBox(nX, nOffsetY, nXEnd, nOffsetY+MICROPROFILE_TEXT_HEIGHT, pColors[i]|0xff000000); + } + } + } + } +} +void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight) +{ + MICROPROFILE_SCOPE(g_MicroProfileDraw); + MicroProfile& S = *MicroProfileGet(); + + { + static int once = 0; + if(0 == once) + { + std::recursive_mutex& m = MicroProfileGetMutex(); + m.lock(); + MicroProfileInitUI(); + + + + uint32_t nDisplay = S.nDisplay; + MicroProfileLoadPreset(MICROPROFILE_DEFAULT_PRESET); + once++; + S.nDisplay = nDisplay;// dont load display, just state + m.unlock(); + + } + } + + + if(S.nDisplay) + { + std::recursive_mutex& m = MicroProfileGetMutex(); + m.lock(); + UI.nWidth = nWidth; + UI.nHeight = nHeight; + UI.nHoverToken = MICROPROFILE_INVALID_TOKEN; + UI.nHoverTime = 0; + UI.nHoverFrame = -1; + if(S.nDisplay != MP_DRAW_DETAILED) + S.nContextSwitchHoverThread = S.nContextSwitchHoverThreadAfter = S.nContextSwitchHoverThreadBefore = -1; + MicroProfileMoveGraph(); + + + if(S.nDisplay == MP_DRAW_DETAILED) + { + MicroProfileDrawDetailedView(nWidth, nHeight); + } + else if(S.nDisplay == MP_DRAW_BARS && S.nBars) + { + MicroProfileDrawBarView(nWidth, nHeight); + } + + MicroProfileDrawMenu(nWidth, nHeight); + bool bMouseOverGraph = MicroProfileDrawGraph(nWidth, nHeight); + MicroProfileDrawCustom(nWidth, nHeight); + bool bHidden = S.nDisplay == MP_DRAW_HIDDEN; + if(!bHidden) + { + uint32_t nLockedToolTipX = 3; + bool bDeleted = false; + for(int i = 0; i < MICROPROFILE_TOOLTIP_MAX_LOCKED; ++i) + { + int nIndex = (g_MicroProfileUI.LockedToolTipFront + i) % MICROPROFILE_TOOLTIP_MAX_LOCKED; + if(g_MicroProfileUI.LockedToolTips[nIndex].ppStrings[0]) + { + uint32_t nToolTipWidth = 0, nToolTipHeight = 0; + MicroProfileFloatWindowSize(g_MicroProfileUI.LockedToolTips[nIndex].ppStrings, g_MicroProfileUI.LockedToolTips[nIndex].nNumStrings, 0, nToolTipWidth, nToolTipHeight, 0); + uint32_t nStartY = nHeight - nToolTipHeight - 2; + if(!bDeleted && UI.nMouseY > nStartY && UI.nMouseX > nLockedToolTipX && UI.nMouseX <= nLockedToolTipX + nToolTipWidth && (UI.nMouseLeft || UI.nMouseRight) ) + { + bDeleted = true; + int j = i; + for(; j < MICROPROFILE_TOOLTIP_MAX_LOCKED-1; ++j) + { + int nIndex0 = (g_MicroProfileUI.LockedToolTipFront + j) % MICROPROFILE_TOOLTIP_MAX_LOCKED; + int nIndex1 = (g_MicroProfileUI.LockedToolTipFront + j+1) % MICROPROFILE_TOOLTIP_MAX_LOCKED; + MicroProfileStringArrayCopy(&g_MicroProfileUI.LockedToolTips[nIndex0], &g_MicroProfileUI.LockedToolTips[nIndex1]); + } + MicroProfileStringArrayClear(&g_MicroProfileUI.LockedToolTips[(g_MicroProfileUI.LockedToolTipFront + j) % MICROPROFILE_TOOLTIP_MAX_LOCKED]); + } + else + { + MicroProfileDrawFloatWindow(nLockedToolTipX, nHeight-nToolTipHeight-2, &g_MicroProfileUI.LockedToolTips[nIndex].ppStrings[0], g_MicroProfileUI.LockedToolTips[nIndex].nNumStrings, g_MicroProfileUI.nLockedToolTipColor[nIndex]); + nLockedToolTipX += nToolTipWidth + 4; + } + } + } + + if(UI.nActiveMenu == 8) + { + if(S.nDisplay & MP_DRAW_DETAILED) + { + MicroProfileStringArray DetailedHelp; + MicroProfileStringArrayClear(&DetailedHelp); + MicroProfileStringArrayFormat(&DetailedHelp, "%s", MICROPROFILE_HELP_LEFT); + MicroProfileStringArrayAddLiteral(&DetailedHelp, "Toggle Graph"); + MicroProfileStringArrayFormat(&DetailedHelp, "%s", MICROPROFILE_HELP_ALT); + MicroProfileStringArrayAddLiteral(&DetailedHelp, "Zoom"); + MicroProfileStringArrayFormat(&DetailedHelp, "%s + %s", MICROPROFILE_HELP_MOD, MICROPROFILE_HELP_LEFT); + MicroProfileStringArrayAddLiteral(&DetailedHelp, "Lock Tooltip"); + MicroProfileStringArrayAddLiteral(&DetailedHelp, "Drag"); + MicroProfileStringArrayAddLiteral(&DetailedHelp, "Pan View"); + MicroProfileStringArrayAddLiteral(&DetailedHelp, "Mouse Wheel"); + MicroProfileStringArrayAddLiteral(&DetailedHelp, "Zoom"); + MicroProfileDrawFloatWindow(nWidth, MICROPROFILE_FRAME_HISTORY_HEIGHT+20, DetailedHelp.ppStrings, DetailedHelp.nNumStrings, 0xff777777); + + MicroProfileStringArray DetailedHistoryHelp; + MicroProfileStringArrayClear(&DetailedHistoryHelp); + MicroProfileStringArrayFormat(&DetailedHistoryHelp, "%s", MICROPROFILE_HELP_LEFT); + MicroProfileStringArrayAddLiteral(&DetailedHistoryHelp, "Center View"); + MicroProfileStringArrayFormat(&DetailedHistoryHelp, "%s", MICROPROFILE_HELP_ALT); + MicroProfileStringArrayAddLiteral(&DetailedHistoryHelp, "Zoom to frame"); + MicroProfileDrawFloatWindow(nWidth, 20, DetailedHistoryHelp.ppStrings, DetailedHistoryHelp.nNumStrings, 0xff777777); + + + + } + else if(0 != (S.nDisplay & MP_DRAW_BARS) && S.nBars) + { + MicroProfileStringArray BarHelp; + MicroProfileStringArrayClear(&BarHelp); + MicroProfileStringArrayFormat(&BarHelp, "%s", MICROPROFILE_HELP_LEFT); + MicroProfileStringArrayAddLiteral(&BarHelp, "Toggle Graph"); + MicroProfileStringArrayFormat(&BarHelp, "%s + %s", MICROPROFILE_HELP_MOD, MICROPROFILE_HELP_LEFT); + MicroProfileStringArrayAddLiteral(&BarHelp, "Lock Tooltip"); + MicroProfileStringArrayAddLiteral(&BarHelp, "Drag"); + MicroProfileStringArrayAddLiteral(&BarHelp, "Pan View"); + MicroProfileDrawFloatWindow(nWidth, MICROPROFILE_FRAME_HISTORY_HEIGHT+20, BarHelp.ppStrings, BarHelp.nNumStrings, 0xff777777); + + } + MicroProfileStringArray Debug; + MicroProfileStringArrayClear(&Debug); + MicroProfileStringArrayAddLiteral(&Debug, "Memory Usage"); + MicroProfileStringArrayFormat(&Debug, "%4.2fmb", S.nMemUsage / (1024.f * 1024.f)); + MicroProfileStringArrayAddLiteral(&Debug, "Web Server Port"); + MicroProfileStringArrayFormat(&Debug, "%d", MicroProfileWebServerPort()); + uint32_t nFrameNext = (S.nFrameCurrent+1) % MICROPROFILE_MAX_FRAME_HISTORY; + MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent]; + MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext]; + + + MicroProfileStringArrayAddLiteral(&Debug, ""); + MicroProfileStringArrayAddLiteral(&Debug, ""); + MicroProfileStringArrayAddLiteral(&Debug, "Usage"); + MicroProfileStringArrayAddLiteral(&Debug, "markers [frames] "); + +#if MICROPROFILE_CONTEXT_SWITCH_TRACE + MicroProfileStringArrayAddLiteral(&Debug, "Context Switch"); + MicroProfileStringArrayFormat(&Debug, "%9d [%7d]", S.nContextSwitchUsage, MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE / S.nContextSwitchUsage ); +#endif + + for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + if(pFrameCurrent->nLogStart[i] && S.Pool[i]) + { + uint32_t nEnd = pFrameNext->nLogStart[i]; + uint32_t nStart = pFrameCurrent->nLogStart[i]; + uint32_t nUsage = nStart < nEnd ? (nEnd - nStart) : (nEnd + MICROPROFILE_BUFFER_SIZE - nStart); + uint32_t nFrameSupport = MICROPROFILE_BUFFER_SIZE / nUsage; + MicroProfileStringArrayFormat(&Debug, "%s", &S.Pool[i]->ThreadName[0]); + MicroProfileStringArrayFormat(&Debug, "%9d [%7d]", nUsage, nFrameSupport); + } + } + + MicroProfileDrawFloatWindow(0, nHeight-10, Debug.ppStrings, Debug.nNumStrings, 0xff777777); + } + + + + if(UI.nActiveMenu == -1 && !bMouseOverGraph) + { + if(UI.nHoverToken != MICROPROFILE_INVALID_TOKEN) + { + MicroProfileDrawFloatTooltip(UI.nMouseX, UI.nMouseY, UI.nHoverToken, UI.nHoverTime); + } + else if(S.nContextSwitchHoverThreadAfter != -1 && S.nContextSwitchHoverThreadBefore != -1) + { + float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + MicroProfileStringArray ToolTip; + MicroProfileStringArrayClear(&ToolTip); + MicroProfileStringArrayAddLiteral(&ToolTip, "Context Switch"); + MicroProfileStringArrayFormat(&ToolTip, "%04x", S.nContextSwitchHoverThread); + MicroProfileStringArrayAddLiteral(&ToolTip, "Before"); + MicroProfileStringArrayFormat(&ToolTip, "%04x", S.nContextSwitchHoverThreadBefore); + MicroProfileStringArrayAddLiteral(&ToolTip, "After"); + MicroProfileStringArrayFormat(&ToolTip, "%04x", S.nContextSwitchHoverThreadAfter); + MicroProfileStringArrayAddLiteral(&ToolTip, "Duration"); + int64_t nDifference = MicroProfileLogTickDifference(S.nContextSwitchHoverTickIn, S.nContextSwitchHoverTickOut); + MicroProfileStringArrayFormat(&ToolTip, "%6.2fms", fToMs * nDifference ); + MicroProfileStringArrayAddLiteral(&ToolTip, "CPU"); + MicroProfileStringArrayFormat(&ToolTip, "%d", S.nContextSwitchHoverCpu); + MicroProfileDrawFloatWindow(UI.nMouseX, UI.nMouseY+20, &ToolTip.ppStrings[0], ToolTip.nNumStrings, -1); + + + } + else if(UI.nHoverFrame != -1) + { + uint32_t nNextFrame = (UI.nHoverFrame+1)%MICROPROFILE_MAX_FRAME_HISTORY; + int64_t nTick = S.Frames[UI.nHoverFrame].nFrameStartCpu; + int64_t nTickNext = S.Frames[nNextFrame].nFrameStartCpu; + int64_t nTickGpu = S.Frames[UI.nHoverFrame].nFrameStartGpu; + int64_t nTickNextGpu = S.Frames[nNextFrame].nFrameStartGpu; + + float fToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()); + float fToMsGpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu()); + float fMs = fToMs * (nTickNext - nTick); + float fMsGpu = fToMsGpu * (nTickNextGpu - nTickGpu); + MicroProfileStringArray ToolTip; + MicroProfileStringArrayClear(&ToolTip); + MicroProfileStringArrayFormat(&ToolTip, "Frame %d", UI.nHoverFrame); + #if MICROPROFILE_DEBUG + MicroProfileStringArrayFormat(&ToolTip, "%p", &S.Frames[UI.nHoverFrame]); + #else + MicroProfileStringArrayAddLiteral(&ToolTip, ""); + #endif + MicroProfileStringArrayAddLiteral(&ToolTip, "CPU Time"); + MicroProfileStringArrayFormat(&ToolTip, "%6.2fms", fMs); + MicroProfileStringArrayAddLiteral(&ToolTip, "GPU Time"); + MicroProfileStringArrayFormat(&ToolTip, "%6.2fms", fMsGpu); + #if MICROPROFILE_DEBUG + for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + if(S.Frames[UI.nHoverFrame].nLogStart[i]) + { + MicroProfileStringArrayFormat(&ToolTip, "%d", i); + MicroProfileStringArrayFormat(&ToolTip, "%d", S.Frames[UI.nHoverFrame].nLogStart[i]); + } + } + #endif + MicroProfileDrawFloatWindow(UI.nMouseX, UI.nMouseY+20, &ToolTip.ppStrings[0], ToolTip.nNumStrings, -1); + } + if(UI.nMouseLeft) + { + if(UI.nHoverToken != MICROPROFILE_INVALID_TOKEN) + MicroProfileToggleGraph(UI.nHoverToken); + } + } + } + +#if MICROPROFILE_DRAWCURSOR + { + float fCursor[8] = + { + MicroProfileMax(0, (int)UI.nMouseX-3), UI.nMouseY, + MicroProfileMin(nWidth, UI.nMouseX+3), UI.nMouseY, + UI.nMouseX, MicroProfileMax((int)UI.nMouseY-3, 0), + UI.nMouseX, MicroProfileMin(nHeight, UI.nMouseY+3), + }; + MicroProfileDrawLine2D(2, &fCursor[0], 0xff00ff00); + MicroProfileDrawLine2D(2, &fCursor[4], 0xff00ff00); + } +#endif + m.unlock(); + } + else if(UI.nCustomActive != (uint32_t)-1) + { + std::recursive_mutex& m = MicroProfileGetMutex(); + m.lock(); + MicroProfileDrawGraph(nWidth, nHeight); + MicroProfileDrawCustom(nWidth, nHeight); + m.unlock(); + + } + UI.nMouseLeft = UI.nMouseRight = 0; + UI.nMouseLeftMod = UI.nMouseRightMod = 0; + UI.nMouseWheelDelta = 0; + if(S.nOverflow) + S.nOverflow--; + + UI.fDetailedOffset = UI.fDetailedOffset + (UI.fDetailedOffsetTarget - UI.fDetailedOffset) * MICROPROFILE_ANIM_DELAY_PRC; + UI.fDetailedRange = UI.fDetailedRange + (UI.fDetailedRangeTarget - UI.fDetailedRange) * MICROPROFILE_ANIM_DELAY_PRC; + + +} + +bool MicroProfileIsDrawing() +{ + MicroProfile& S = *MicroProfileGet(); + return S.nDisplay != 0; +} + +void MicroProfileToggleGraph(MicroProfileToken nToken) +{ + MicroProfile& S = *MicroProfileGet(); + uint32_t nTimerId = MicroProfileGetTimerIndex(nToken); + nToken &= 0xffff; + int32_t nMinSort = 0x7fffffff; + int32_t nFreeIndex = -1; + int32_t nMinIndex = 0; + int32_t nMaxSort = 0x80000000; + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + if(S.Graph[i].nToken == MICROPROFILE_INVALID_TOKEN) + nFreeIndex = i; + if(S.Graph[i].nToken == nToken) + { + S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN; + S.TimerInfo[nTimerId].bGraph = false; + return; + } + if(S.Graph[i].nKey < nMinSort) + { + nMinSort = S.Graph[i].nKey; + nMinIndex = i; + } + if(S.Graph[i].nKey > nMaxSort) + { + nMaxSort = S.Graph[i].nKey; + } + } + int nIndex = nFreeIndex > -1 ? nFreeIndex : nMinIndex; + if (nFreeIndex == -1) + { + uint32_t idx = MicroProfileGetTimerIndex(S.Graph[nIndex].nToken); + S.TimerInfo[idx].bGraph = false; + } + S.Graph[nIndex].nToken = nToken; + S.Graph[nIndex].nKey = nMaxSort+1; + memset(&S.Graph[nIndex].nHistory[0], 0, sizeof(S.Graph[nIndex].nHistory)); + S.TimerInfo[nTimerId].bGraph = true; +} + + +void MicroProfileMousePosition(uint32_t nX, uint32_t nY, int nWheelDelta) +{ + UI.nMouseX = nX; + UI.nMouseY = nY; + UI.nMouseWheelDelta = nWheelDelta; +} + +void MicroProfileModKey(uint32_t nKeyState) +{ + UI.nModDown = nKeyState ? 1 : 0; +} + +void MicroProfileClearGraph() +{ + MicroProfile& S = *MicroProfileGet(); + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + if(S.Graph[i].nToken != 0) + { + S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN; + } + } +} + +void MicroProfileMouseButton(uint32_t nLeft, uint32_t nRight) +{ + bool bCanRelease = abs((int)(UI.nMouseDownX - UI.nMouseX)) + abs((int)(UI.nMouseDownY - UI.nMouseY)) < 3; + + if(0 == nLeft && UI.nMouseDownLeft && bCanRelease) + { + if(UI.nModDown) + UI.nMouseLeftMod = 1; + else + UI.nMouseLeft = 1; + } + + if(0 == nRight && UI.nMouseDownRight && bCanRelease) + { + if(UI.nModDown) + UI.nMouseRightMod = 1; + else + UI.nMouseRight = 1; + } + if((nLeft || nRight) && !(UI.nMouseDownLeft || UI.nMouseDownRight)) + { + UI.nMouseDownX = UI.nMouseX; + UI.nMouseDownY = UI.nMouseY; + } + + UI.nMouseDownLeft = nLeft; + UI.nMouseDownRight = nRight; + +} + +void MicroProfileDrawLineVertical(int nX, int nTop, int nBottom, uint32_t nColor) +{ + MicroProfileDrawBox(nX, nTop, nX + 1, nBottom, nColor); +} + +void MicroProfileDrawLineHorizontal(int nLeft, int nRight, int nY, uint32_t nColor) +{ + MicroProfileDrawBox(nLeft, nY, nRight, nY + 1, nColor); +} + + + +#include <stdio.h> + +#define MICROPROFILE_PRESET_HEADER_MAGIC 0x28586813 +#define MICROPROFILE_PRESET_HEADER_VERSION 0x00000102 +struct MicroProfilePresetHeader +{ + uint32_t nMagic; + uint32_t nVersion; + //groups, threads, aggregate, reference frame, graphs timers + uint32_t nGroups[MICROPROFILE_MAX_GROUPS]; + uint32_t nThreads[MICROPROFILE_MAX_THREADS]; + uint32_t nGraphName[MICROPROFILE_MAX_GRAPHS]; + uint32_t nGraphGroupName[MICROPROFILE_MAX_GRAPHS]; + uint32_t nAllGroupsWanted; + uint32_t nAllThreadsWanted; + uint32_t nAggregateFlip; + float fReferenceTime; + uint32_t nBars; + uint32_t nDisplay; + uint32_t nOpacityBackground; + uint32_t nOpacityForeground; + uint32_t nShowSpikes; +}; + +#ifndef MICROPROFILE_PRESET_FILENAME_FUNC +#define MICROPROFILE_PRESET_FILENAME_FUNC MicroProfilePresetFilename +static const char* MicroProfilePresetFilename(const char* pSuffix) +{ + static char filename[512]; + snprintf(filename, sizeof(filename)-1, ".microprofilepreset.%s", pSuffix); + return filename; +} +#endif + +void MicroProfileSavePreset(const char* pPresetName) +{ + std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex()); + FILE* F = fopen(MICROPROFILE_PRESET_FILENAME_FUNC(pPresetName), "wb"); + if(!F) return; + + MicroProfile& S = *MicroProfileGet(); + + MicroProfilePresetHeader Header; + memset(&Header, 0, sizeof(Header)); + Header.nAggregateFlip = S.nAggregateFlip; + Header.nBars = S.nBars; + Header.fReferenceTime = S.fReferenceTime; + Header.nAllGroupsWanted = S.nAllGroupsWanted; + Header.nAllThreadsWanted = S.nAllThreadsWanted; + Header.nMagic = MICROPROFILE_PRESET_HEADER_MAGIC; + Header.nVersion = MICROPROFILE_PRESET_HEADER_VERSION; + Header.nDisplay = S.nDisplay; + Header.nOpacityBackground = UI.nOpacityBackground; + Header.nOpacityForeground = UI.nOpacityForeground; + Header.nShowSpikes = UI.bShowSpikes ? 1 : 0; + fwrite(&Header, sizeof(Header), 1, F); + uint64_t nMask = 1; + for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + if(S.nActiveGroupWanted & nMask) + { + uint32_t offset = ftell(F); + const char* pName = S.GroupInfo[i].pName; + int nLen = (int)strlen(pName)+1; + fwrite(pName, nLen, 1, F); + Header.nGroups[i] = offset; + } + nMask <<= 1; + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + MicroProfileThreadLog* pLog = S.Pool[i]; + if(pLog && S.nThreadActive[i]) + { + uint32_t nOffset = ftell(F); + const char* pName = &pLog->ThreadName[0]; + int nLen = (int)strlen(pName)+1; + fwrite(pName, nLen, 1, F); + Header.nThreads[i] = nOffset; + } + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + MicroProfileToken nToken = S.Graph[i].nToken; + if(nToken != MICROPROFILE_INVALID_TOKEN) + { + uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken); + uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken); + const char* pGroupName = S.GroupInfo[nGroupIndex].pName; + const char* pTimerName = S.TimerInfo[nTimerIndex].pName; + MP_ASSERT(pGroupName); + MP_ASSERT(pTimerName); + int nGroupLen = (int)strlen(pGroupName)+1; + int nTimerLen = (int)strlen(pTimerName)+1; + + uint32_t nOffsetGroup = ftell(F); + fwrite(pGroupName, nGroupLen, 1, F); + uint32_t nOffsetTimer = ftell(F); + fwrite(pTimerName, nTimerLen, 1, F); + Header.nGraphName[i] = nOffsetTimer; + Header.nGraphGroupName[i] = nOffsetGroup; + } + } + fseek(F, 0, SEEK_SET); + fwrite(&Header, sizeof(Header), 1, F); + + fclose(F); + +} + + + +void MicroProfileLoadPreset(const char* pSuffix) +{ + std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex()); + FILE* F = fopen(MICROPROFILE_PRESET_FILENAME_FUNC(pSuffix), "rb"); + if(!F) + { + return; + } + fseek(F, 0, SEEK_END); + int nSize = ftell(F); + char* const pBuffer = (char*)alloca(nSize); + fseek(F, 0, SEEK_SET); + int nRead = (int)fread(pBuffer, nSize, 1, F); + fclose(F); + if(1 != nRead) + return; + + MicroProfile& S = *MicroProfileGet(); + + MicroProfilePresetHeader& Header = *(MicroProfilePresetHeader*)pBuffer; + + if(Header.nMagic != MICROPROFILE_PRESET_HEADER_MAGIC || Header.nVersion != MICROPROFILE_PRESET_HEADER_VERSION) + { + return; + } + + S.nAggregateFlip = Header.nAggregateFlip; + S.nBars = Header.nBars; + S.fReferenceTime = Header.fReferenceTime; + S.fRcpReferenceTime = 1.f / Header.fReferenceTime; + S.nAllGroupsWanted = Header.nAllGroupsWanted; + S.nAllThreadsWanted = Header.nAllThreadsWanted; + S.nDisplay = Header.nDisplay; + S.nActiveGroupWanted = 0; + UI.nOpacityBackground = Header.nOpacityBackground; + UI.nOpacityForeground = Header.nOpacityForeground; + UI.bShowSpikes = Header.nShowSpikes == 1; + + memset(&S.nThreadActive[0], 0, sizeof(S.nThreadActive)); + + for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i) + { + if(Header.nGroups[i]) + { + const char* pGroupName = pBuffer + Header.nGroups[i]; + for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j) + { + if(0 == MP_STRCASECMP(pGroupName, S.GroupInfo[j].pName)) + { + S.nActiveGroupWanted |= (1ll << j); + } + } + } + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i) + { + if(Header.nThreads[i]) + { + const char* pThreadName = pBuffer + Header.nThreads[i]; + for(uint32_t j = 0; j < MICROPROFILE_MAX_THREADS; ++j) + { + MicroProfileThreadLog* pLog = S.Pool[j]; + if(pLog && 0 == MP_STRCASECMP(pThreadName, &pLog->ThreadName[0])) + { + S.nThreadActive[j] = 1; + } + } + } + } + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + MicroProfileToken nPrevToken = S.Graph[i].nToken; + S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN; + if(Header.nGraphName[i] && Header.nGraphGroupName[i]) + { + const char* pGraphName = pBuffer + Header.nGraphName[i]; + const char* pGraphGroupName = pBuffer + Header.nGraphGroupName[i]; + for(uint32_t j = 0; j < S.nTotalTimers; ++j) + { + uint64_t nGroupIndex = S.TimerInfo[j].nGroupIndex; + if(0 == MP_STRCASECMP(pGraphName, S.TimerInfo[j].pName) && 0 == MP_STRCASECMP(pGraphGroupName, S.GroupInfo[nGroupIndex].pName)) + { + MicroProfileToken nToken = MicroProfileMakeToken(1ll << nGroupIndex, (uint16_t)j); + S.Graph[i].nToken = nToken; // note: group index is stored here but is checked without in MicroProfileToggleGraph()! + S.TimerInfo[j].bGraph = true; + if(nToken != nPrevToken) + { + memset(&S.Graph[i].nHistory, 0, sizeof(S.Graph[i].nHistory)); + } + break; + } + } + } + } +} + +uint32_t MicroProfileCustomGroupFind(const char* pCustomName) +{ + for(uint32_t i = 0; i < UI.nCustomCount; ++i) + { + if(!MP_STRCASECMP(pCustomName, UI.Custom[i].pName)) + { + return i; + } + } + return (uint32_t)-1; +} + +uint32_t MicroProfileCustomGroup(const char* pCustomName) +{ + for(uint32_t i = 0; i < UI.nCustomCount; ++i) + { + if(!MP_STRCASECMP(pCustomName, UI.Custom[i].pName)) + { + return i; + } + } + MP_ASSERT(UI.nCustomCount < MICROPROFILE_CUSTOM_MAX); + uint32_t nIndex = UI.nCustomCount; + UI.nCustomCount++; + memset(&UI.Custom[nIndex], 0, sizeof(UI.Custom[nIndex])); + uint32_t nLen = (uint32_t)strlen(pCustomName); + if(nLen > MICROPROFILE_NAME_MAX_LEN-1) + nLen = MICROPROFILE_NAME_MAX_LEN-1; + memcpy(&UI.Custom[nIndex].pName[0], pCustomName, nLen); + UI.Custom[nIndex].pName[nLen] = '\0'; + return nIndex; +} +void MicroProfileCustomGroup(const char* pCustomName, uint32_t nMaxTimers, uint32_t nAggregateFlip, float fReferenceTime, uint32_t nFlags) +{ + uint32_t nIndex = MicroProfileCustomGroup(pCustomName); + MP_ASSERT(UI.Custom[nIndex].pTimers == 0);//only call once! + UI.Custom[nIndex].pTimers = &UI.CustomTimer[UI.nCustomTimerCount]; + UI.Custom[nIndex].nMaxTimers = nMaxTimers; + UI.Custom[nIndex].fReference = fReferenceTime; + UI.nCustomTimerCount += nMaxTimers; + MP_ASSERT(UI.nCustomTimerCount <= MICROPROFILE_CUSTOM_MAX_TIMERS); //bump MICROPROFILE_CUSTOM_MAX_TIMERS + UI.Custom[nIndex].nFlags = nFlags; + UI.Custom[nIndex].nAggregateFlip = nAggregateFlip; +} + +void MicroProfileCustomGroupEnable(uint32_t nIndex) +{ + if(nIndex < UI.nCustomCount) + { + MicroProfile& S = *MicroProfileGet(); + S.nForceGroupUI = UI.Custom[nIndex].nGroupMask; + MicroProfileSetAggregateFrames(UI.Custom[nIndex].nAggregateFlip); + S.fReferenceTime = UI.Custom[nIndex].fReference; + S.fRcpReferenceTime = 1.f / UI.Custom[nIndex].fReference; + UI.nCustomActive = nIndex; + + for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i) + { + if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN) + { + uint32_t nTimerId = MicroProfileGetTimerIndex(S.Graph[i].nToken); + S.TimerInfo[nTimerId].bGraph = false; + S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN; + } + } + + for(uint32_t i = 0; i < UI.Custom[nIndex].nNumTimers; ++i) + { + if(i == MICROPROFILE_MAX_GRAPHS) + { + break; + } + S.Graph[i].nToken = UI.Custom[nIndex].pTimers[i]; + S.Graph[i].nKey = i; + uint32_t nTimerId = MicroProfileGetTimerIndex(S.Graph[i].nToken); + S.TimerInfo[nTimerId].bGraph = true; + } + } +} + +void MicroProfileCustomGroupToggle(const char* pCustomName) +{ + uint32_t nIndex = MicroProfileCustomGroupFind(pCustomName); + if(nIndex == (uint32_t)-1 || nIndex == UI.nCustomActive) + { + MicroProfileCustomGroupDisable(); + } + else + { + MicroProfileCustomGroupEnable(nIndex); + } +} + +void MicroProfileCustomGroupEnable(const char* pCustomName) +{ + uint32_t nIndex = MicroProfileCustomGroupFind(pCustomName); + MicroProfileCustomGroupEnable(nIndex); +} +void MicroProfileCustomGroupDisable() +{ + MicroProfile& S = *MicroProfileGet(); + S.nForceGroupUI = 0; + UI.nCustomActive = (uint32_t)-1; +} + +void MicroProfileCustomGroupAddTimer(const char* pCustomName, const char* pGroup, const char* pTimer) +{ + uint32_t nIndex = MicroProfileCustomGroupFind(pCustomName); + if((uint32_t)-1 == nIndex) + { + return; + } + uint32_t nTimerIndex = UI.Custom[nIndex].nNumTimers; + MP_ASSERT(nTimerIndex < UI.Custom[nIndex].nMaxTimers); + uint64_t nToken = MicroProfileFindToken(pGroup, pTimer); + MP_ASSERT(nToken != MICROPROFILE_INVALID_TOKEN); //Timer must be registered first. + UI.Custom[nIndex].pTimers[nTimerIndex] = nToken; + uint16_t nGroup = MicroProfileGetGroupIndex(nToken); + UI.Custom[nIndex].nGroupMask |= (1ll << nGroup); + UI.Custom[nIndex].nNumTimers++; +} + +#undef UI + +#endif +#endif diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp index d6fcb66a5..46f4a07c9 100644 --- a/src/citra/citra.cpp +++ b/src/citra/citra.cpp @@ -6,6 +6,9 @@ #include <thread> #include <iostream> +// This needs to be included before getopt.h because the latter #defines symbols used by it +#include "common/microprofile.h" + #ifdef _MSC_VER #include <getopt.h> #else @@ -59,6 +62,8 @@ int main(int argc, char **argv) { Log::Filter log_filter(Log::Level::Debug); Log::SetFilter(&log_filter); + MicroProfileOnThreadCreate("EmuThread"); + if (boot_filename.empty()) { LOG_CRITICAL(Frontend, "Failed to load ROM: No ROM specified"); return -1; @@ -89,5 +94,7 @@ int main(int argc, char **argv) { delete emu_window; + MicroProfileShutdown(); + return 0; } diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index 0c0515054..a82e8a85b 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -18,6 +18,7 @@ set(SRCS debugger/ramview.cpp debugger/registers.cpp util/spinbox.cpp + util/util.cpp bootmanager.cpp hotkeys.cpp main.cpp @@ -42,6 +43,7 @@ set(HEADERS debugger/ramview.h debugger/registers.h util/spinbox.h + util/util.h bootmanager.h hotkeys.h main.h diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index a96fbea5f..f8aacb527 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -14,6 +14,7 @@ #include "common/string_util.h" #include "common/scm_rev.h" #include "common/key_map.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/settings.h" @@ -37,6 +38,8 @@ EmuThread::EmuThread(GRenderWindow* render_window) : void EmuThread::run() { render_window->MakeCurrent(); + MicroProfileOnThreadCreate("EmuThread"); + stop_run = false; // holds whether the cpu was running during the last iteration, @@ -69,6 +72,8 @@ void EmuThread::run() { } } + MicroProfileOnThreadExit(); + render_window->moveContext(); } diff --git a/src/citra_qt/debugger/graphics.cpp b/src/citra_qt/debugger/graphics.cpp index 7424671f1..7d15028f0 100644 --- a/src/citra_qt/debugger/graphics.cpp +++ b/src/citra_qt/debugger/graphics.cpp @@ -7,6 +7,8 @@ #include <QVBoxLayout> #include <QDebug> +#include "citra_qt/util/util.h" + extern GraphicsDebugger g_debugger; GPUCommandStreamItemModel::GPUCommandStreamItemModel(QObject* parent) : QAbstractListModel(parent), command_count(0) @@ -79,7 +81,7 @@ GPUCommandStreamWidget::GPUCommandStreamWidget(QWidget* parent) : QDockWidget(tr QListView* command_list = new QListView; command_list->setModel(command_model); - command_list->setFont(QFont("monospace")); + command_list->setFont(GetMonospaceFont()); setWidget(command_list); } diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index 35a3140b2..025434687 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -14,6 +14,8 @@ #include <QSpinBox> #include <QComboBox> +#include "citra_qt/util/util.h" + #include "common/vector_math.h" #include "video_core/debug_utils/debug_utils.h" @@ -303,9 +305,7 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi list_widget = new QTreeView; list_widget->setModel(model); - QFont font("monospace"); - font.setStyleHint(QFont::Monospace); // Automatic fallback to a monospace font on on platforms without a font called "monospace" - list_widget->setFont(font); + list_widget->setFont(GetMonospaceFont()); list_widget->setRootIsDecorated(false); list_widget->setUniformRowHeights(true); diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 0c17edee0..1d9a00e89 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp @@ -15,6 +15,8 @@ #include <QSpinBox> #include <QTreeView> +#include "citra_qt/util/util.h" + #include "video_core/shader/shader.h" #include "graphics_vertex_shader.h" @@ -245,7 +247,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con } case Qt::FontRole: - return QFont("monospace"); + return GetMonospaceFont(); case Qt::BackgroundRole: // Highlight instructions which have no debug data associated to them diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp index 89b28c2f4..5261d4836 100644 --- a/src/citra_qt/debugger/profiler.cpp +++ b/src/citra_qt/debugger/profiler.cpp @@ -2,9 +2,21 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <QMouseEvent> +#include <QPainter> +#include <QString> + #include "profiler.h" +#include "citra_qt/util/util.h" + #include "common/profiler_reporting.h" +#include "common/microprofile.h" + +// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the +// non-Qt frontends don't need it (and don't implement the UI drawing hooks either). +#define MICROPROFILEUI_IMPL 1 +#include "common/microprofileui.h" using namespace Common::Profiling; @@ -136,3 +148,193 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable) update_timer.stop(); } } + +class MicroProfileWidget : public QWidget { +public: + MicroProfileWidget(QWidget* parent = 0); + +protected: + void paintEvent(QPaintEvent* ev) override; + void showEvent(QShowEvent* ev) override; + void hideEvent(QHideEvent* ev) override; + + void mouseMoveEvent(QMouseEvent* ev) override; + void mousePressEvent(QMouseEvent* ev) override; + void mouseReleaseEvent(QMouseEvent* ev) override; + void wheelEvent(QWheelEvent* ev) override; + + void keyPressEvent(QKeyEvent* ev) override; + void keyReleaseEvent(QKeyEvent* ev) override; + +private: + /// This timer is used to redraw the widget's contents continuously. To save resources, it only + /// runs while the widget is visible. + QTimer update_timer; +}; + +MicroProfileDialog::MicroProfileDialog(QWidget* parent) + : QWidget(parent, Qt::Dialog) +{ + setObjectName("MicroProfile"); + setWindowTitle(tr("MicroProfile")); + resize(1000, 600); + // Remove the "?" button from the titlebar and enable the maximize button + setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint); + + MicroProfileWidget* widget = new MicroProfileWidget(this); + + QLayout* layout = new QVBoxLayout(this); + layout->setContentsMargins(0, 0, 0, 0); + layout->addWidget(widget); + setLayout(layout); + + // Configure focus so that widget is focusable and the dialog automatically forwards focus to it. + setFocusProxy(widget); + widget->setFocusPolicy(Qt::StrongFocus); + widget->setFocus(); +} + +QAction* MicroProfileDialog::toggleViewAction() { + if (toggle_view_action == nullptr) { + toggle_view_action = new QAction(windowTitle(), this); + toggle_view_action->setCheckable(true); + toggle_view_action->setChecked(isVisible()); + connect(toggle_view_action, SIGNAL(toggled(bool)), SLOT(setVisible(bool))); + } + + return toggle_view_action; +} + +void MicroProfileDialog::showEvent(QShowEvent* ev) { + if (toggle_view_action) { + toggle_view_action->setChecked(isVisible()); + } + QWidget::showEvent(ev); +} + +void MicroProfileDialog::hideEvent(QHideEvent* ev) { + if (toggle_view_action) { + toggle_view_action->setChecked(isVisible()); + } + QWidget::hideEvent(ev); +} + +/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the +/// QPainter available inside the drawing callbacks. +static QPainter* mp_painter = nullptr; + +MicroProfileWidget::MicroProfileWidget(QWidget* parent) : QWidget(parent) { + // Send mouse motion events even when not dragging. + setMouseTracking(true); + + MicroProfileSetDisplayMode(1); // Timers screen + MicroProfileInitUI(); + + connect(&update_timer, SIGNAL(timeout()), SLOT(update())); +} + +void MicroProfileWidget::paintEvent(QPaintEvent* ev) { + QPainter painter(this); + + painter.setBackground(Qt::black); + painter.eraseRect(rect()); + + QFont font = GetMonospaceFont(); + font.setPixelSize(MICROPROFILE_TEXT_HEIGHT); + painter.setFont(font); + + mp_painter = &painter; + MicroProfileDraw(rect().width(), rect().height()); + mp_painter = nullptr; +} + +void MicroProfileWidget::showEvent(QShowEvent* ev) { + update_timer.start(15); // ~60 Hz + QWidget::showEvent(ev); +} + +void MicroProfileWidget::hideEvent(QHideEvent* ev) { + update_timer.stop(); + QWidget::hideEvent(ev); +} + +void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) { + MicroProfileMousePosition(ev->x(), ev->y(), 0); + ev->accept(); +} + +void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) { + MicroProfileMousePosition(ev->x(), ev->y(), 0); + MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); + ev->accept(); +} + +void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { + MicroProfileMousePosition(ev->x(), ev->y(), 0); + MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); + ev->accept(); +} + +void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { + MicroProfileMousePosition(ev->x(), ev->y(), ev->delta() / 120); + ev->accept(); +} + +void MicroProfileWidget::keyPressEvent(QKeyEvent* ev) { + if (ev->key() == Qt::Key_Control) { + // Inform MicroProfile that the user is holding Ctrl. + MicroProfileModKey(1); + } + QWidget::keyPressEvent(ev); +} + +void MicroProfileWidget::keyReleaseEvent(QKeyEvent* ev) { + if (ev->key() == Qt::Key_Control) { + MicroProfileModKey(0); + } + QWidget::keyReleaseEvent(ev); +} + +// These functions are called by MicroProfileDraw to draw the interface elements on the screen. + +void MicroProfileDrawText(int x, int y, u32 hex_color, const char* text, u32 text_length) { + // hex_color does not include an alpha, so it must be assumed to be 255 + mp_painter->setPen(QColor::fromRgb(hex_color)); + + // It's impossible to draw a string using a monospaced font with a fixed width per cell in a + // way that's reliable across different platforms and fonts as far as I (yuriks) can tell, so + // draw each character individually in order to precisely control the text advance. + for (u32 i = 0; i < text_length; ++i) { + // Position the text baseline 1 pixel above the bottom of the text cell, this gives nice + // vertical alignment of text for a wide range of tested fonts. + mp_painter->drawText(x, y + MICROPROFILE_TEXT_HEIGHT - 2, QChar(text[i])); + x += MICROPROFILE_TEXT_WIDTH + 1; + } +} + +void MicroProfileDrawBox(int left, int top, int right, int bottom, u32 hex_color, MicroProfileBoxType type) { + QColor color = QColor::fromRgba(hex_color); + QBrush brush = color; + if (type == MicroProfileBoxTypeBar) { + QLinearGradient gradient(left, top, left, bottom); + gradient.setColorAt(0.f, color.lighter(125)); + gradient.setColorAt(1.f, color.darker(125)); + brush = gradient; + } + mp_painter->fillRect(left, top, right - left, bottom - top, brush); +} + +void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color) { + // Temporary vector used to convert between the float array and QPointF. Marked static to reuse + // the allocation across calls. + static std::vector<QPointF> point_buf; + + for (u32 i = 0; i < vertices_length; ++i) { + point_buf.emplace_back(vertices[i*2 + 0], vertices[i*2 + 1]); + } + + // hex_color does not include an alpha, so it must be assumed to be 255 + mp_painter->setPen(QColor::fromRgb(hex_color)); + mp_painter->drawPolyline(point_buf.data(), vertices_length); + point_buf.clear(); +} diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h index fabf279b8..2199eaef1 100644 --- a/src/citra_qt/debugger/profiler.h +++ b/src/citra_qt/debugger/profiler.h @@ -48,3 +48,20 @@ private: QTimer update_timer; }; + +class MicroProfileDialog : public QWidget { + Q_OBJECT + +public: + MicroProfileDialog(QWidget* parent = 0); + + /// Returns a QAction that can be used to toggle visibility of this dialog. + QAction* toggleViewAction(); + +protected: + void showEvent(QShowEvent* ev) override; + void hideEvent(QHideEvent* ev) override; + +private: + QAction* toggle_view_action = nullptr; +}; diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index a1a4865bd..7fb1b0dcb 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -17,6 +17,7 @@ #include "common/logging/backend.h" #include "common/logging/filter.h" #include "common/make_unique.h" +#include "common/microprofile.h" #include "common/platform.h" #include "common/scm_rev.h" #include "common/scope_exit.h" @@ -64,6 +65,9 @@ GMainWindow::GMainWindow() : emu_thread(nullptr) addDockWidget(Qt::BottomDockWidgetArea, profilerWidget); profilerWidget->hide(); + microProfileDialog = new MicroProfileDialog(this); + microProfileDialog->hide(); + disasmWidget = new DisassemblerWidget(this, emu_thread.get()); addDockWidget(Qt::BottomDockWidgetArea, disasmWidget); disasmWidget->hide(); @@ -102,6 +106,7 @@ GMainWindow::GMainWindow() : emu_thread(nullptr) QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging")); debug_menu->addAction(profilerWidget->toggleViewAction()); + debug_menu->addAction(microProfileDialog->toggleViewAction()); debug_menu->addAction(disasmWidget->toggleViewAction()); debug_menu->addAction(registersWidget->toggleViewAction()); debug_menu->addAction(callstackWidget->toggleViewAction()); @@ -128,6 +133,8 @@ GMainWindow::GMainWindow() : emu_thread(nullptr) restoreGeometry(settings.value("geometry").toByteArray()); restoreState(settings.value("state").toByteArray()); render_window->restoreGeometry(settings.value("geometryRenderWindow").toByteArray()); + microProfileDialog->restoreGeometry(settings.value("microProfileDialogGeometry").toByteArray()); + microProfileDialog->setVisible(settings.value("microProfileDialogVisible").toBool()); ui.action_Use_Hardware_Renderer->setChecked(Settings::values.use_hw_renderer); SetHardwareRendererEnabled(ui.action_Use_Hardware_Renderer->isChecked()); @@ -287,6 +294,17 @@ void GMainWindow::ShutdownGame() { render_window->hide(); } +void GMainWindow::StoreRecentFile(const QString& filename) +{ + QSettings settings; + QStringList recent_files = settings.value("recentFiles").toStringList(); + recent_files.prepend(filename); + recent_files.removeDuplicates(); + settings.setValue("recentFiles", recent_files); + + UpdateRecentFiles(); +} + void GMainWindow::UpdateRecentFiles() { QSettings settings; QStringList recent_files = settings.value("recentFiles").toStringList(); @@ -297,6 +315,7 @@ void GMainWindow::UpdateRecentFiles() { QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName()); actions_recent_files[i]->setText(text); actions_recent_files[i]->setData(recent_files[i]); + actions_recent_files[i]->setToolTip(recent_files[i]); actions_recent_files[i]->setVisible(true); } @@ -319,11 +338,7 @@ void GMainWindow::OnMenuLoadFile() { QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)")); if (filename.size()) { settings.setValue("romsPath", QFileInfo(filename).path()); - // Update recent files list - QStringList recent_files = settings.value("recentFiles").toStringList(); - recent_files.prepend(filename); - settings.setValue("recentFiles", recent_files); - UpdateRecentFiles(); // Update UI + StoreRecentFile(filename); BootGame(filename.toLatin1().data()); } @@ -349,6 +364,7 @@ void GMainWindow::OnMenuRecentFile() { QFileInfo file_info(filename); if (file_info.exists()) { BootGame(filename.toLatin1().data()); + StoreRecentFile(filename); // Put the filename on top of the list } else { // Display an error message and remove the file from the list. QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename)); @@ -357,12 +373,7 @@ void GMainWindow::OnMenuRecentFile() { QStringList recent_files = settings.value("recentFiles").toStringList(); recent_files.removeOne(filename); settings.setValue("recentFiles", recent_files); - - action->setVisible(false); - // Grey out the recent files menu if the list is empty - if (ui.menu_recent_files->isEmpty()) { - ui.menu_recent_files->setEnabled(false); - } + UpdateRecentFiles(); } } @@ -430,6 +441,8 @@ void GMainWindow::closeEvent(QCloseEvent* event) { settings.setValue("geometry", saveGeometry()); settings.setValue("state", saveState()); settings.setValue("geometryRenderWindow", render_window->saveGeometry()); + settings.setValue("microProfileDialogGeometry", microProfileDialog->saveGeometry()); + settings.setValue("microProfileDialogVisible", microProfileDialog->isVisible()); settings.setValue("singleWindowMode", ui.action_Single_Window_Mode->isChecked()); settings.setValue("displayTitleBars", ui.actionDisplay_widget_title_bars->isChecked()); settings.setValue("firstStart", false); @@ -452,6 +465,11 @@ int main(int argc, char* argv[]) { Log::Filter log_filter(Log::Level::Info); Log::SetFilter(&log_filter); + MicroProfileOnThreadCreate("Frontend"); + SCOPE_EXIT({ + MicroProfileShutdown(); + }); + // Init settings params QSettings::setDefaultFormat(QSettings::IniFormat); QCoreApplication::setOrganizationName("Citra team"); diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h index 4b260ae8b..32523fded 100644 --- a/src/citra_qt/main.h +++ b/src/citra_qt/main.h @@ -14,6 +14,7 @@ class GImageInfo; class GRenderWindow; class EmuThread; class ProfilerWidget; +class MicroProfileDialog; class DisassemblerWidget; class RegistersWidget; class CallstackWidget; @@ -60,6 +61,24 @@ private: void BootGame(const std::string& filename); void ShutdownGame(); + /** + * Stores the filename in the recently loaded files list. + * The new filename is stored at the beginning of the recently loaded files list. + * After inserting the new entry, duplicates are removed meaning that if + * this was inserted from \a OnMenuRecentFile(), the entry will be put on top + * and remove from its previous position. + * + * Finally, this function calls \a UpdateRecentFiles() to update the UI. + * + * @param filename the filename to store + */ + void StoreRecentFile(const QString& filename); + + /** + * Updates the recent files menu. + * Menu entries are rebuilt from the configuration file. + * If there is no entry in the menu, the menu is greyed out. + */ void UpdateRecentFiles(); void closeEvent(QCloseEvent* event) override; @@ -86,6 +105,7 @@ private: std::unique_ptr<EmuThread> emu_thread; ProfilerWidget* profilerWidget; + MicroProfileDialog* microProfileDialog; DisassemblerWidget* disasmWidget; RegistersWidget* registersWidget; CallstackWidget* callstackWidget; diff --git a/src/citra_qt/util/util.cpp b/src/citra_qt/util/util.cpp new file mode 100644 index 000000000..2cb939af1 --- /dev/null +++ b/src/citra_qt/util/util.cpp @@ -0,0 +1,13 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "util.h" + +QFont GetMonospaceFont() { + QFont font("monospace"); + // Automatic fallback to a monospace font on on platforms without a font called "monospace" + font.setStyleHint(QFont::Monospace); + font.setFixedPitch(true); + return font; +} diff --git a/src/citra_qt/util/util.h b/src/citra_qt/util/util.h new file mode 100644 index 000000000..98a944047 --- /dev/null +++ b/src/citra_qt/util/util.h @@ -0,0 +1,10 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <QFont> + +/// Returns a QFont object appropriate to use as a monospace font for debugging widgets, etc. +QFont GetMonospaceFont(); diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e743a026d..7f3712efa 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -11,6 +11,7 @@ set(SRCS logging/text_formatter.cpp logging/backend.cpp memory_util.cpp + microprofile.cpp misc.cpp profiler.cpp scm_rev.cpp @@ -43,6 +44,8 @@ set(HEADERS make_unique.h math_util.h memory_util.h + microprofile.h + microprofileui.h platform.h profiler.h profiler_reporting.h diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 88e452a16..ed20c3629 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -45,14 +45,20 @@ // GCC 4.8 defines all the rotate functions now // Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit -#ifndef _rotl -inline u32 _rotl(u32 x, int shift) { +#ifdef _rotl +#define rotl _rotl +#else +inline u32 rotl(u32 x, int shift) { shift &= 31; if (!shift) return x; return (x << shift) | (x >> (32 - shift)); } +#endif -inline u32 _rotr(u32 x, int shift) { +#ifdef _rotr +#define rotr _rotr +#else +inline u32 rotr(u32 x, int shift) { shift &= 31; if (!shift) return x; return (x >> shift) | (x << (32 - shift)); diff --git a/src/common/file_util.h b/src/common/file_util.h index d0dccdf69..e71a9b2fa 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h @@ -244,7 +244,7 @@ private: template <typename T> void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode) { -#ifdef _WIN32 +#ifdef _MSC_VER fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode); #else fstream.open(filename.c_str(), openmode); diff --git a/src/common/logging/log.h b/src/common/logging/log.h index e16dde7fc..5fd3bd7f5 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -91,17 +91,16 @@ void LogMessage(Class log_class, Level log_level, } // namespace Log #define LOG_GENERIC(log_class, log_level, ...) \ - ::Log::LogMessage(::Log::Class::log_class, ::Log::Level::log_level, \ - __FILE__, __LINE__, __func__, __VA_ARGS__) + ::Log::LogMessage(log_class, log_level, __FILE__, __LINE__, __func__, __VA_ARGS__) #ifdef _DEBUG -#define LOG_TRACE( log_class, ...) LOG_GENERIC(log_class, Trace, __VA_ARGS__) +#define LOG_TRACE( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Trace, __VA_ARGS__) #else #define LOG_TRACE( log_class, ...) (void(0)) #endif -#define LOG_DEBUG( log_class, ...) LOG_GENERIC(log_class, Debug, __VA_ARGS__) -#define LOG_INFO( log_class, ...) LOG_GENERIC(log_class, Info, __VA_ARGS__) -#define LOG_WARNING( log_class, ...) LOG_GENERIC(log_class, Warning, __VA_ARGS__) -#define LOG_ERROR( log_class, ...) LOG_GENERIC(log_class, Error, __VA_ARGS__) -#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(log_class, Critical, __VA_ARGS__) +#define LOG_DEBUG( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Debug, __VA_ARGS__) +#define LOG_INFO( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Info, __VA_ARGS__) +#define LOG_WARNING( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Warning, __VA_ARGS__) +#define LOG_ERROR( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Error, __VA_ARGS__) +#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Critical, __VA_ARGS__) diff --git a/src/common/microprofile.cpp b/src/common/microprofile.cpp new file mode 100644 index 000000000..ee25dd37f --- /dev/null +++ b/src/common/microprofile.cpp @@ -0,0 +1,7 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Includes the MicroProfile implementation in this file for compilation +#define MICROPROFILE_IMPL 1 +#include "common/microprofile.h" diff --git a/src/common/microprofile.h b/src/common/microprofile.h new file mode 100644 index 000000000..9eb6016a8 --- /dev/null +++ b/src/common/microprofile.h @@ -0,0 +1,25 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +// Customized Citra settings. +// This file wraps the MicroProfile header so that these are consistent everywhere. +#define MICROPROFILE_WEBSERVER 0 +#define MICROPROFILE_GPU_TIMERS 0 // TODO: Implement timer queries when we upgrade to OpenGL 3.3 +#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0 +#define MICROPROFILE_PER_THREAD_BUFFER_SIZE (2048<<12) // 8 MB + +#include <microprofile.h> + +#define MP_RGB(r, g, b) ((r) << 16 | (g) << 8 | (b) << 0) + +// On OS X, some Mach header included by MicroProfile defines these as macros, conflicting with +// identifiers we use. +#ifdef PAGE_SIZE +#undef PAGE_SIZE +#endif +#ifdef PAGE_MASK +#undef PAGE_MASK +#endif diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h new file mode 100644 index 000000000..97c369bd9 --- /dev/null +++ b/src/common/microprofileui.h @@ -0,0 +1,16 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/microprofile.h" + +// Customized Citra settings. +// This file wraps the MicroProfile header so that these are consistent everywhere. +#define MICROPROFILE_TEXT_WIDTH 6 +#define MICROPROFILE_TEXT_HEIGHT 12 +#define MICROPROFILE_HELP_ALT "Right-Click" +#define MICROPROFILE_HELP_MOD "Ctrl" + +#include <microprofileui.h> diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 4b79acd1f..939df210e 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp @@ -15,6 +15,7 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ +#include <cinttypes> #include <cstring> #include "common/assert.h" @@ -25,11 +26,6 @@ #include "cpu_detect.h" #include "emitter.h" -#define PRIx64 "llx" - -// Minimize the diff against Dolphin -#define DYNA_REC JIT - namespace Gen { @@ -113,6 +109,29 @@ u8 *XEmitter::GetWritableCodePtr() return code; } +void XEmitter::Write8(u8 value) +{ + *code++ = value; +} + +void XEmitter::Write16(u16 value) +{ + std::memcpy(code, &value, sizeof(u16)); + code += sizeof(u16); +} + +void XEmitter::Write32(u32 value) +{ + std::memcpy(code, &value, sizeof(u32)); + code += sizeof(u32); +} + +void XEmitter::Write64(u64 value) +{ + std::memcpy(code, &value, sizeof(u64)); + code += sizeof(u64); +} + void XEmitter::ReserveCodeSpace(int bytes) { for (int i = 0; i < bytes; i++) @@ -374,7 +393,7 @@ void XEmitter::Rex(int w, int r, int x, int b) Write8(rx); } -void XEmitter::JMP(const u8 *addr, bool force5Bytes) +void XEmitter::JMP(const u8* addr, bool force5Bytes) { u64 fn = (u64)addr; if (!force5Bytes) @@ -398,7 +417,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes) } } -void XEmitter::JMPptr(const OpArg &arg2) +void XEmitter::JMPptr(const OpArg& arg2) { OpArg arg = arg2; if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); @@ -425,7 +444,7 @@ void XEmitter::CALLptr(OpArg arg) arg.WriteRest(this); } -void XEmitter::CALL(const void *fnptr) +void XEmitter::CALL(const void* fnptr) { u64 distance = u64(fnptr) - (u64(code) + 5); ASSERT_MSG( @@ -496,7 +515,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) } } -void XEmitter::SetJumpTarget(const FixupBranch &branch) +void XEmitter::SetJumpTarget(const FixupBranch& branch) { if (branch.type == 0) { @@ -512,30 +531,6 @@ void XEmitter::SetJumpTarget(const FixupBranch &branch) } } -// INC/DEC considered harmful on newer CPUs due to partial flag set. -// Use ADD, SUB instead. - -/* -void XEmitter::INC(int bits, OpArg arg) -{ - if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument"); - arg.operandReg = 0; - if (bits == 16) {Write8(0x66);} - arg.WriteRex(this, bits, bits); - Write8(bits == 8 ? 0xFE : 0xFF); - arg.WriteRest(this); -} -void XEmitter::DEC(int bits, OpArg arg) -{ - if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument"); - arg.operandReg = 1; - if (bits == 16) {Write8(0x66);} - arg.WriteRex(this, bits, bits); - Write8(bits == 8 ? 0xFE : 0xFF); - arg.WriteRest(this); -} -*/ - //Single byte opcodes //There is no PUSHAD/POPAD in 64-bit mode. void XEmitter::INT3() {Write8(0xCC);} @@ -667,7 +662,7 @@ void XEmitter::CBW(int bits) void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} -void XEmitter::PUSH(int bits, const OpArg ®) +void XEmitter::PUSH(int bits, const OpArg& reg) { if (reg.IsSimpleReg()) PUSH(reg.GetSimpleReg()); @@ -703,7 +698,7 @@ void XEmitter::PUSH(int bits, const OpArg ®) } } -void XEmitter::POP(int /*bits*/, const OpArg ®) +void XEmitter::POP(int /*bits*/, const OpArg& reg) { if (reg.IsSimpleReg()) POP(reg.GetSimpleReg()); @@ -791,12 +786,12 @@ void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) src.WriteRest(this); } -void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);} -void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);} -void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);} -void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} -void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);} -void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} +void XEmitter::MUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 4);} +void XEmitter::DIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 6);} +void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);} +void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);} +void XEmitter::NEG(int bits, const OpArg& src) {WriteMulDivType(bits, src, 3);} +void XEmitter::NOT(int bits, const OpArg& src) {WriteMulDivType(bits, src, 2);} void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) { @@ -813,24 +808,24 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo src.WriteRest(this); } -void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) +void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) { if (bits <= 16) ASSERT_MSG(0, "MOVNTI - bits<=16"); WriteBitSearchType(bits, src, dest, 0xC3); } -void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit -void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit +void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit +void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit -void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src) +void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) { CheckFlags(); if (!Common::GetCPUCaps().bmi1) ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); WriteBitSearchType(bits, dest, src, 0xBC, true); } -void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src) +void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) { CheckFlags(); if (!Common::GetCPUCaps().lzcnt) @@ -950,7 +945,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src) } //shift can be either imm8 or cl -void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) +void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) { CheckFlags(); bool writeImm = false; @@ -991,16 +986,16 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) // large rotates and shift are slower on intel than amd // intel likes to rotate by 1, and the op is smaller too -void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);} -void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);} -void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);} -void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);} -void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);} -void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} -void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} +void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);} +void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);} +void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);} +void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);} +void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);} +void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);} +void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);} // index can be either imm8 or register, don't use memory destination because it's slow -void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) +void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) { CheckFlags(); if (dest.IsImm()) @@ -1029,13 +1024,13 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) } } -void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);} -void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);} -void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);} -void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);} +void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 4);} +void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);} +void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);} +void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);} //shift can be either imm8 or cl -void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) +void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) { CheckFlags(); if (dest.IsImm()) @@ -1067,7 +1062,7 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) } } -void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) +void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) { CheckFlags(); if (dest.IsImm()) @@ -1111,7 +1106,7 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit } //operand can either be immediate or register -void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const +void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const { X64Reg _operandReg; if (IsImm()) @@ -1257,7 +1252,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o } } -void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2) +void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2) { if (a1.IsImm()) { @@ -1283,24 +1278,24 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg } } -void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} -void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} -void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} -void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} -void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} -void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} -void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} -void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) +void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} +void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} +void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} +void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} +void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} +void XEmitter::OR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} +void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} +void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2) { if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); WriteNormalOp(this, bits, nrmMOV, a1, a2); } -void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} -void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} -void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} +void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} +void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} +void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} -void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) +void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) { CheckFlags(); if (bits == 8) @@ -1353,7 +1348,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) } } -void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) +void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) { CheckFlags(); if (bits == 8) @@ -1390,7 +1385,7 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr arg.WriteRest(this, extrabytes); } -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); } @@ -1400,25 +1395,25 @@ static int GetVEXmmmmm(u16 op) // Currently, only 0x38 and 0x3A are used as secondary escape byte. if ((op >> 8) == 0x3A) return 3; - else if ((op >> 8) == 0x38) + if ((op >> 8) == 0x38) return 2; - else - return 1; + + return 1; } static int GetVEXpp(u8 opPrefix) { if (opPrefix == 0x66) return 1; - else if (opPrefix == 0xF3) + if (opPrefix == 0xF3) return 2; - else if (opPrefix == 0xF2) + if (opPrefix == 0xF2) return 3; - else - return 0; + + return 0; } -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) { if (!Common::GetCPUCaps().avx) ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); @@ -1431,7 +1426,7 @@ void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpA } // Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 -void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) { if (size != 32 && size != 64) ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); @@ -1442,7 +1437,7 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r arg.WriteRest(this, extrabytes, regOp1); } -void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) { CheckFlags(); if (!Common::GetCPUCaps().bmi1) @@ -1450,7 +1445,7 @@ void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); } -void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) { CheckFlags(); if (!Common::GetCPUCaps().bmi2) @@ -1517,135 +1512,136 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext) arg.WriteRest(this); } -void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} -void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} - -void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} -void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} -void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} - -void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} -void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} -void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} -void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} -void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} -void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} -void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} -void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} -void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} -void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} -void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} -void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} -void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} -void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} -void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} - -void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} -void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} -void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} -void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} -void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} -void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} -void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} -void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} -void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} -void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} -void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} -void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} -void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} -void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} -void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} -void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} -void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} -void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} -void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} -void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} -void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} -void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} -void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } -void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} -void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} -void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} - -void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} - -void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed -void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered -void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered -void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} - -void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} -void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} -void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} -void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} - -void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} - -void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} -void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} -void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} -void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} - -void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} - -void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } -void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } -void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } -void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } - -void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } -void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } -void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } -void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } +void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);} +void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);} + +void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} +void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} +void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} + +void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} +void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} +void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} +void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} +void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} +void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} +void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} +void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} +void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} +void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} +void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} +void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} +void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} +void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} +void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRCP, regOp, arg);} +void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} + +void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} +void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} +void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} +void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} +void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} +void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} +void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} +void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} +void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} +void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} +void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} +void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} +void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} +void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} +void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} +void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} +void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} +void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} +void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} +void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} +void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} +void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} +void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} +void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } +void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} +void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} +void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} + +void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} + +void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed +void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered +void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered +void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} + +void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} +void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} +void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} +void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} + +void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} +void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} + +void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} +void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} +void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} +void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} + +void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} +void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} +void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} + +void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } +void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } +void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } +void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } + +void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } +void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } +void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } +void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} -void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} -void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} +void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} +void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} -void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} -void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} -void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} -void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} -void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} -void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} +void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} +void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} +void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} +void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} +void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} +void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} -void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} -void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} -void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} -void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} +void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} +void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} +void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} +void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} -void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} -void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} -void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} -void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} +void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} +void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} +void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} +void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} -void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);} -void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);} +void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);} +void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);} -void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only +void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only // THESE TWO ARE UNTESTED. -void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);} -void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);} +void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);} +void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);} -void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);} -void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);} +void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);} +void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);} -void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) +void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) { if (Common::GetCPUCaps().sse3) { @@ -1663,9 +1659,9 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) //There are a few more left // Also some integer instructions are missing -void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} -void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);} -void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);} +void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} +void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);} +void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);} void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} @@ -1690,7 +1686,7 @@ void XEmitter::PSRLQ(X64Reg reg, int shift) Write8(shift); } -void XEmitter::PSRLQ(X64Reg reg, OpArg arg) +void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) { WriteSSEOp(0x66, 0xd3, reg, arg); } @@ -1735,212 +1731,212 @@ void XEmitter::PSRAD(X64Reg reg, int shift) Write8(shift); } -void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { if (!Common::GetCPUCaps().ssse3) ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); } -void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) +void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { if (!Common::GetCPUCaps().sse4_1) ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); } -void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} -void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} -void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} -void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} - -void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} -void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} -void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} -void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} -void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} -void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} -void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} -void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} - -void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} -void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} -void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} -void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} -void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} -void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} -void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} -void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} -void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} -void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} -void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} -void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} - -void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} -void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} -void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} +void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} +void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} +void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} +void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} + +void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} +void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} +void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} +void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} +void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} +void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} +void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} +void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} + +void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} +void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} +void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} +void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} +void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} +void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} +void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} +void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} +void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} +void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} +void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} +void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} + +void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} +void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} +void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); } void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } -void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} +void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} -void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} -void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} -void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} -void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} +void XEmitter::PAND(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} +void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} +void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} +void XEmitter::POR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} -void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} -void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} -void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} -void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} +void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} +void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} +void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} +void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} -void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} -void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);} -void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} -void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} +void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} +void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xED, dest, arg);} +void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} +void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} -void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} -void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} -void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} -void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} +void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} +void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} +void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} +void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} -void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} -void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} -void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} -void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} +void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} +void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} +void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} +void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} -void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} -void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} +void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} +void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} -void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);} -void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);} -void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);} +void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x74, dest, arg);} +void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x75, dest, arg);} +void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x76, dest, arg);} -void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);} -void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);} -void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);} +void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x64, dest, arg);} +void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x65, dest, arg);} +void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x66, dest, arg);} -void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} -void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} +void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} +void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} -void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } -void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} +void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } +void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} -void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } -void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } -void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } -void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } +void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } +void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } +void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } +void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } -void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } -void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } +void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} // VEX -void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} -void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} -void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} -void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} -void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} -void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} -void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} -void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} -void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} -void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} -void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} -void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} - -void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } -void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } -void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } -void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } -void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } -void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } -void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } -void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } - -void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } -void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } -void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } -void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } - -void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } -void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } -void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } -void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } -void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } -void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } -void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } -void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } -void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } -void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } -void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } -void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } -void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } -void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } -void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } -void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } -void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } -void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } -void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } - -void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} -void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} -void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} -void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} -void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} -void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} +void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} +void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} +void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} +void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} +void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} +void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} +void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} +void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} +void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} +void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} +void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} +void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} + +void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } +void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } +void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } +void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } +void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } +void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } +void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } +void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } + +void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } +void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } +void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } +void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } + +void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } +void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } +void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } +void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } +void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } +void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } +void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } +void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } +void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } +void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } +void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } +void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } +void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } +void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } +void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } +void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } +void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } +void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } +void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } +void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } +void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } +void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } +void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } +void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } +void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } + +void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} +void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} +void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} +void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} +void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} +void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} // Prefixes @@ -1956,7 +1952,7 @@ void XEmitter::FWAIT() } // TODO: make this more generic -void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg) +void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) { int mf = 0; ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); @@ -1974,9 +1970,9 @@ void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg a arg.WriteRest(this, 0, (X64Reg) op); } -void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} -void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} -void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} +void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} +void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} +void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index e9c924126..a49cd2cf1 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h @@ -328,8 +328,6 @@ enum SSECompare ORD, }; -typedef const u8* JumpTarget; - class XEmitter { friend struct OpArg; // for Write8 etc @@ -344,27 +342,27 @@ private: void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); void WriteMulDivType(int bits, OpArg src, int ext); void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); - void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); - void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); + void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext); + void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext); void WriteMXCSR(OpArg arg, int ext); void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); - void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); + void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); + void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); + void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); protected: - inline void Write8(u8 value) {*code++ = value;} - inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} - inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} - inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} + void Write8(u8 value); + void Write16(u16 value); + void Write32(u32 value); + void Write64(u64 value); public: XEmitter() { code = nullptr; flags_locked = false; } @@ -413,8 +411,8 @@ public: // Stack control void PUSH(X64Reg reg); void POP(X64Reg reg); - void PUSH(int bits, const OpArg ®); - void POP(int bits, const OpArg ®); + void PUSH(int bits, const OpArg& reg); + void POP(int bits, const OpArg& reg); void PUSHF(); void POPF(); @@ -424,21 +422,19 @@ public: void UD2(); FixupBranch J(bool force5bytes = false); - void JMP(const u8 * addr, bool force5Bytes = false); - void JMP(OpArg arg); - void JMPptr(const OpArg &arg); + void JMP(const u8* addr, bool force5Bytes = false); + void JMPptr(const OpArg& arg); void JMPself(); //infinite loop! #ifdef CALL #undef CALL #endif - void CALL(const void *fnptr); + void CALL(const void* fnptr); void CALLptr(OpArg arg); FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); - //void J_CC(CCFlags conditionCode, JumpTarget target); - void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false); + void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); - void SetJumpTarget(const FixupBranch &branch); + void SetJumpTarget(const FixupBranch& branch); void SETcc(CCFlags flag, OpArg dest); // Note: CMOV brings small if any benefit on current cpus. @@ -450,8 +446,8 @@ public: void SFENCE(); // Bit scan - void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit - void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit + void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit + void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit // Cache control enum PrefetchLevel @@ -462,67 +458,67 @@ public: PF_T2, //Levels 3+ (aliased to T0 on AMD) }; void PREFETCH(PrefetchLevel level, OpArg arg); - void MOVNTI(int bits, OpArg dest, X64Reg src); - void MOVNTDQ(OpArg arg, X64Reg regOp); - void MOVNTPS(OpArg arg, X64Reg regOp); - void MOVNTPD(OpArg arg, X64Reg regOp); + void MOVNTI(int bits, const OpArg& dest, X64Reg src); + void MOVNTDQ(const OpArg& arg, X64Reg regOp); + void MOVNTPS(const OpArg& arg, X64Reg regOp); + void MOVNTPD(const OpArg& arg, X64Reg regOp); // Multiplication / division - void MUL(int bits, OpArg src); //UNSIGNED - void IMUL(int bits, OpArg src); //SIGNED - void IMUL(int bits, X64Reg regOp, OpArg src); - void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm); - void DIV(int bits, OpArg src); - void IDIV(int bits, OpArg src); + void MUL(int bits, const OpArg& src); //UNSIGNED + void IMUL(int bits, const OpArg& src); //SIGNED + void IMUL(int bits, X64Reg regOp, const OpArg& src); + void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); + void DIV(int bits, const OpArg& src); + void IDIV(int bits, const OpArg& src); // Shift - void ROL(int bits, OpArg dest, OpArg shift); - void ROR(int bits, OpArg dest, OpArg shift); - void RCL(int bits, OpArg dest, OpArg shift); - void RCR(int bits, OpArg dest, OpArg shift); - void SHL(int bits, OpArg dest, OpArg shift); - void SHR(int bits, OpArg dest, OpArg shift); - void SAR(int bits, OpArg dest, OpArg shift); + void ROL(int bits, const OpArg& dest, const OpArg& shift); + void ROR(int bits, const OpArg& dest, const OpArg& shift); + void RCL(int bits, const OpArg& dest, const OpArg& shift); + void RCR(int bits, const OpArg& dest, const OpArg& shift); + void SHL(int bits, const OpArg& dest, const OpArg& shift); + void SHR(int bits, const OpArg& dest, const OpArg& shift); + void SAR(int bits, const OpArg& dest, const OpArg& shift); // Bit Test - void BT(int bits, OpArg dest, OpArg index); - void BTS(int bits, OpArg dest, OpArg index); - void BTR(int bits, OpArg dest, OpArg index); - void BTC(int bits, OpArg dest, OpArg index); + void BT(int bits, const OpArg& dest, const OpArg& index); + void BTS(int bits, const OpArg& dest, const OpArg& index); + void BTR(int bits, const OpArg& dest, const OpArg& index); + void BTC(int bits, const OpArg& dest, const OpArg& index); // Double-Precision Shift - void SHRD(int bits, OpArg dest, OpArg src, OpArg shift); - void SHLD(int bits, OpArg dest, OpArg src, OpArg shift); + void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); + void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); // Extend EAX into EDX in various ways void CWD(int bits = 16); - inline void CDQ() {CWD(32);} - inline void CQO() {CWD(64);} + void CDQ() {CWD(32);} + void CQO() {CWD(64);} void CBW(int bits = 8); - inline void CWDE() {CBW(16);} - inline void CDQE() {CBW(32);} + void CWDE() {CBW(16);} + void CDQE() {CBW(32);} // Load effective address void LEA(int bits, X64Reg dest, OpArg src); // Integer arithmetic - void NEG (int bits, OpArg src); - void ADD (int bits, const OpArg &a1, const OpArg &a2); - void ADC (int bits, const OpArg &a1, const OpArg &a2); - void SUB (int bits, const OpArg &a1, const OpArg &a2); - void SBB (int bits, const OpArg &a1, const OpArg &a2); - void AND (int bits, const OpArg &a1, const OpArg &a2); - void CMP (int bits, const OpArg &a1, const OpArg &a2); + void NEG(int bits, const OpArg& src); + void ADD(int bits, const OpArg& a1, const OpArg& a2); + void ADC(int bits, const OpArg& a1, const OpArg& a2); + void SUB(int bits, const OpArg& a1, const OpArg& a2); + void SBB(int bits, const OpArg& a1, const OpArg& a2); + void AND(int bits, const OpArg& a1, const OpArg& a2); + void CMP(int bits, const OpArg& a1, const OpArg& a2); // Bit operations - void NOT (int bits, OpArg src); - void OR (int bits, const OpArg &a1, const OpArg &a2); - void XOR (int bits, const OpArg &a1, const OpArg &a2); - void MOV (int bits, const OpArg &a1, const OpArg &a2); - void TEST(int bits, const OpArg &a1, const OpArg &a2); + void NOT (int bits, const OpArg& src); + void OR(int bits, const OpArg& a1, const OpArg& a2); + void XOR(int bits, const OpArg& a1, const OpArg& a2); + void MOV(int bits, const OpArg& a1, const OpArg& a2); + void TEST(int bits, const OpArg& a1, const OpArg& a2); // Are these useful at all? Consider removing. - void XCHG(int bits, const OpArg &a1, const OpArg &a2); + void XCHG(int bits, const OpArg& a1, const OpArg& a2); void XCHG_AHAL(); // Byte swapping (32 and 64-bit only). @@ -536,13 +532,13 @@ public: void MOVBE(int dbits, const OpArg& dest, const OpArg& src); // Available only on AMD >= Phenom or Intel >= Haswell - void LZCNT(int bits, X64Reg dest, OpArg src); + void LZCNT(int bits, X64Reg dest, const OpArg& src); // Note: this one is actually part of BMI1 - void TZCNT(int bits, X64Reg dest, OpArg src); + void TZCNT(int bits, X64Reg dest, const OpArg& src); // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) - void STMXCSR(OpArg memloc); - void LDMXCSR(OpArg memloc); + void STMXCSR(const OpArg& memloc); + void LDMXCSR(const OpArg& memloc); // Prefixes void LOCK(); @@ -569,259 +565,243 @@ public: x87_FPUBusy = 0x8000, }; - void FLD(int bits, OpArg src); - void FST(int bits, OpArg dest); - void FSTP(int bits, OpArg dest); + void FLD(int bits, const OpArg& src); + void FST(int bits, const OpArg& dest); + void FSTP(int bits, const OpArg& dest); void FNSTSW_AX(); void FWAIT(); // SSE/SSE2: Floating point arithmetic - void ADDSS(X64Reg regOp, OpArg arg); - void ADDSD(X64Reg regOp, OpArg arg); - void SUBSS(X64Reg regOp, OpArg arg); - void SUBSD(X64Reg regOp, OpArg arg); - void MULSS(X64Reg regOp, OpArg arg); - void MULSD(X64Reg regOp, OpArg arg); - void DIVSS(X64Reg regOp, OpArg arg); - void DIVSD(X64Reg regOp, OpArg arg); - void MINSS(X64Reg regOp, OpArg arg); - void MINSD(X64Reg regOp, OpArg arg); - void MAXSS(X64Reg regOp, OpArg arg); - void MAXSD(X64Reg regOp, OpArg arg); - void SQRTSS(X64Reg regOp, OpArg arg); - void SQRTSD(X64Reg regOp, OpArg arg); - void RSQRTSS(X64Reg regOp, OpArg arg); + void ADDSS(X64Reg regOp, const OpArg& arg); + void ADDSD(X64Reg regOp, const OpArg& arg); + void SUBSS(X64Reg regOp, const OpArg& arg); + void SUBSD(X64Reg regOp, const OpArg& arg); + void MULSS(X64Reg regOp, const OpArg& arg); + void MULSD(X64Reg regOp, const OpArg& arg); + void DIVSS(X64Reg regOp, const OpArg& arg); + void DIVSD(X64Reg regOp, const OpArg& arg); + void MINSS(X64Reg regOp, const OpArg& arg); + void MINSD(X64Reg regOp, const OpArg& arg); + void MAXSS(X64Reg regOp, const OpArg& arg); + void MAXSD(X64Reg regOp, const OpArg& arg); + void SQRTSS(X64Reg regOp, const OpArg& arg); + void SQRTSD(X64Reg regOp, const OpArg& arg); + void RCPSS(X64Reg regOp, const OpArg& arg); + void RSQRTSS(X64Reg regOp, const OpArg& arg); // SSE/SSE2: Floating point bitwise (yes) - void CMPSS(X64Reg regOp, OpArg arg, u8 compare); - void CMPSD(X64Reg regOp, OpArg arg, u8 compare); + void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); + void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); - inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } - inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } - inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); } - inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); } - inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); } - inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } - inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } + void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); } + void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); } + void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); } + void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); } + void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); } + void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); } + void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); } // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) - void ADDPS(X64Reg regOp, OpArg arg); - void ADDPD(X64Reg regOp, OpArg arg); - void SUBPS(X64Reg regOp, OpArg arg); - void SUBPD(X64Reg regOp, OpArg arg); - void CMPPS(X64Reg regOp, OpArg arg, u8 compare); - void CMPPD(X64Reg regOp, OpArg arg, u8 compare); - void MULPS(X64Reg regOp, OpArg arg); - void MULPD(X64Reg regOp, OpArg arg); - void DIVPS(X64Reg regOp, OpArg arg); - void DIVPD(X64Reg regOp, OpArg arg); - void MINPS(X64Reg regOp, OpArg arg); - void MINPD(X64Reg regOp, OpArg arg); - void MAXPS(X64Reg regOp, OpArg arg); - void MAXPD(X64Reg regOp, OpArg arg); - void SQRTPS(X64Reg regOp, OpArg arg); - void SQRTPD(X64Reg regOp, OpArg arg); - void RCPPS(X64Reg regOp, OpArg arg); - void RSQRTPS(X64Reg regOp, OpArg arg); + void ADDPS(X64Reg regOp, const OpArg& arg); + void ADDPD(X64Reg regOp, const OpArg& arg); + void SUBPS(X64Reg regOp, const OpArg& arg); + void SUBPD(X64Reg regOp, const OpArg& arg); + void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare); + void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare); + void MULPS(X64Reg regOp, const OpArg& arg); + void MULPD(X64Reg regOp, const OpArg& arg); + void DIVPS(X64Reg regOp, const OpArg& arg); + void DIVPD(X64Reg regOp, const OpArg& arg); + void MINPS(X64Reg regOp, const OpArg& arg); + void MINPD(X64Reg regOp, const OpArg& arg); + void MAXPS(X64Reg regOp, const OpArg& arg); + void MAXPD(X64Reg regOp, const OpArg& arg); + void SQRTPS(X64Reg regOp, const OpArg& arg); + void SQRTPD(X64Reg regOp, const OpArg& arg); + void RCPPS(X64Reg regOp, const OpArg& arg); + void RSQRTPS(X64Reg regOp, const OpArg& arg); // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) - void ANDPS(X64Reg regOp, OpArg arg); - void ANDPD(X64Reg regOp, OpArg arg); - void ANDNPS(X64Reg regOp, OpArg arg); - void ANDNPD(X64Reg regOp, OpArg arg); - void ORPS(X64Reg regOp, OpArg arg); - void ORPD(X64Reg regOp, OpArg arg); - void XORPS(X64Reg regOp, OpArg arg); - void XORPD(X64Reg regOp, OpArg arg); + void ANDPS(X64Reg regOp, const OpArg& arg); + void ANDPD(X64Reg regOp, const OpArg& arg); + void ANDNPS(X64Reg regOp, const OpArg& arg); + void ANDNPD(X64Reg regOp, const OpArg& arg); + void ORPS(X64Reg regOp, const OpArg& arg); + void ORPD(X64Reg regOp, const OpArg& arg); + void XORPS(X64Reg regOp, const OpArg& arg); + void XORPD(X64Reg regOp, const OpArg& arg); // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. - void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); - void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); + void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); + void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle); // SSE/SSE2: Useful alternative to shuffle in some cases. - void MOVDDUP(X64Reg regOp, OpArg arg); - - // TODO: Actually implement -#if 0 - // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products... - void ADDSUBPS(X64Reg dest, OpArg src); - void ADDSUBPD(X64Reg dest, OpArg src); - void HADDPD(X64Reg dest, OpArg src); - void HSUBPS(X64Reg dest, OpArg src); - void HSUBPD(X64Reg dest, OpArg src); - - // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". - void DPPD(X64Reg dest, OpArg src, u8 arg); - - // These are probably useful for VFPU emulation. - void INSERTPS(X64Reg dest, OpArg src, u8 arg); - void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); -#endif + void MOVDDUP(X64Reg regOp, const OpArg& arg); // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. - void HADDPS(X64Reg dest, OpArg src); + void HADDPS(X64Reg dest, const OpArg& src); // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". - void DPPS(X64Reg dest, OpArg src, u8 arg); + void DPPS(X64Reg dest, const OpArg& src, u8 arg); - void UNPCKLPS(X64Reg dest, OpArg src); - void UNPCKHPS(X64Reg dest, OpArg src); - void UNPCKLPD(X64Reg dest, OpArg src); - void UNPCKHPD(X64Reg dest, OpArg src); + void UNPCKLPS(X64Reg dest, const OpArg& src); + void UNPCKHPS(X64Reg dest, const OpArg& src); + void UNPCKLPD(X64Reg dest, const OpArg& src); + void UNPCKHPD(X64Reg dest, const OpArg& src); // SSE/SSE2: Compares. - void COMISS(X64Reg regOp, OpArg arg); - void COMISD(X64Reg regOp, OpArg arg); - void UCOMISS(X64Reg regOp, OpArg arg); - void UCOMISD(X64Reg regOp, OpArg arg); + void COMISS(X64Reg regOp, const OpArg& arg); + void COMISD(X64Reg regOp, const OpArg& arg); + void UCOMISS(X64Reg regOp, const OpArg& arg); + void UCOMISD(X64Reg regOp, const OpArg& arg); // SSE/SSE2: Moves. Use the right data type for your data, in most cases. - void MOVAPS(X64Reg regOp, OpArg arg); - void MOVAPD(X64Reg regOp, OpArg arg); - void MOVAPS(OpArg arg, X64Reg regOp); - void MOVAPD(OpArg arg, X64Reg regOp); - - void MOVUPS(X64Reg regOp, OpArg arg); - void MOVUPD(X64Reg regOp, OpArg arg); - void MOVUPS(OpArg arg, X64Reg regOp); - void MOVUPD(OpArg arg, X64Reg regOp); - - void MOVDQA(X64Reg regOp, OpArg arg); - void MOVDQA(OpArg arg, X64Reg regOp); - void MOVDQU(X64Reg regOp, OpArg arg); - void MOVDQU(OpArg arg, X64Reg regOp); - - void MOVSS(X64Reg regOp, OpArg arg); - void MOVSD(X64Reg regOp, OpArg arg); - void MOVSS(OpArg arg, X64Reg regOp); - void MOVSD(OpArg arg, X64Reg regOp); - - void MOVLPS(X64Reg regOp, OpArg arg); - void MOVLPD(X64Reg regOp, OpArg arg); - void MOVLPS(OpArg arg, X64Reg regOp); - void MOVLPD(OpArg arg, X64Reg regOp); - - void MOVHPS(X64Reg regOp, OpArg arg); - void MOVHPD(X64Reg regOp, OpArg arg); - void MOVHPS(OpArg arg, X64Reg regOp); - void MOVHPD(OpArg arg, X64Reg regOp); + void MOVAPS(X64Reg regOp, const OpArg& arg); + void MOVAPD(X64Reg regOp, const OpArg& arg); + void MOVAPS(const OpArg& arg, X64Reg regOp); + void MOVAPD(const OpArg& arg, X64Reg regOp); + + void MOVUPS(X64Reg regOp, const OpArg& arg); + void MOVUPD(X64Reg regOp, const OpArg& arg); + void MOVUPS(const OpArg& arg, X64Reg regOp); + void MOVUPD(const OpArg& arg, X64Reg regOp); + + void MOVDQA(X64Reg regOp, const OpArg& arg); + void MOVDQA(const OpArg& arg, X64Reg regOp); + void MOVDQU(X64Reg regOp, const OpArg& arg); + void MOVDQU(const OpArg& arg, X64Reg regOp); + + void MOVSS(X64Reg regOp, const OpArg& arg); + void MOVSD(X64Reg regOp, const OpArg& arg); + void MOVSS(const OpArg& arg, X64Reg regOp); + void MOVSD(const OpArg& arg, X64Reg regOp); + + void MOVLPS(X64Reg regOp, const OpArg& arg); + void MOVLPD(X64Reg regOp, const OpArg& arg); + void MOVLPS(const OpArg& arg, X64Reg regOp); + void MOVLPD(const OpArg& arg, X64Reg regOp); + + void MOVHPS(X64Reg regOp, const OpArg& arg); + void MOVHPD(X64Reg regOp, const OpArg& arg); + void MOVHPS(const OpArg& arg, X64Reg regOp); + void MOVHPD(const OpArg& arg, X64Reg regOp); void MOVHLPS(X64Reg regOp1, X64Reg regOp2); void MOVLHPS(X64Reg regOp1, X64Reg regOp2); - void MOVD_xmm(X64Reg dest, const OpArg &arg); + void MOVD_xmm(X64Reg dest, const OpArg& arg); void MOVQ_xmm(X64Reg dest, OpArg arg); - void MOVD_xmm(const OpArg &arg, X64Reg src); + void MOVD_xmm(const OpArg& arg, X64Reg src); void MOVQ_xmm(OpArg arg, X64Reg src); // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. - void MOVMSKPS(X64Reg dest, OpArg arg); - void MOVMSKPD(X64Reg dest, OpArg arg); + void MOVMSKPS(X64Reg dest, const OpArg& arg); + void MOVMSKPD(X64Reg dest, const OpArg& arg); // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. void MASKMOVDQU(X64Reg dest, X64Reg src); - void LDDQU(X64Reg dest, OpArg src); + void LDDQU(X64Reg dest, const OpArg& src); // SSE/SSE2: Data type conversions. - void CVTPS2PD(X64Reg dest, OpArg src); - void CVTPD2PS(X64Reg dest, OpArg src); - void CVTSS2SD(X64Reg dest, OpArg src); - void CVTSI2SS(X64Reg dest, OpArg src); - void CVTSD2SS(X64Reg dest, OpArg src); - void CVTSI2SD(X64Reg dest, OpArg src); - void CVTDQ2PD(X64Reg regOp, OpArg arg); - void CVTPD2DQ(X64Reg regOp, OpArg arg); - void CVTDQ2PS(X64Reg regOp, OpArg arg); - void CVTPS2DQ(X64Reg regOp, OpArg arg); - - void CVTTPS2DQ(X64Reg regOp, OpArg arg); - void CVTTPD2DQ(X64Reg regOp, OpArg arg); + void CVTPS2PD(X64Reg dest, const OpArg& src); + void CVTPD2PS(X64Reg dest, const OpArg& src); + void CVTSS2SD(X64Reg dest, const OpArg& src); + void CVTSI2SS(X64Reg dest, const OpArg& src); + void CVTSD2SS(X64Reg dest, const OpArg& src); + void CVTSI2SD(X64Reg dest, const OpArg& src); + void CVTDQ2PD(X64Reg regOp, const OpArg& arg); + void CVTPD2DQ(X64Reg regOp, const OpArg& arg); + void CVTDQ2PS(X64Reg regOp, const OpArg& arg); + void CVTPS2DQ(X64Reg regOp, const OpArg& arg); + + void CVTTPS2DQ(X64Reg regOp, const OpArg& arg); + void CVTTPD2DQ(X64Reg regOp, const OpArg& arg); // Destinations are X64 regs (rax, rbx, ...) for these instructions. - void CVTSS2SI(X64Reg xregdest, OpArg src); - void CVTSD2SI(X64Reg xregdest, OpArg src); - void CVTTSS2SI(X64Reg xregdest, OpArg arg); - void CVTTSD2SI(X64Reg xregdest, OpArg arg); + void CVTSS2SI(X64Reg xregdest, const OpArg& src); + void CVTSD2SI(X64Reg xregdest, const OpArg& src); + void CVTTSS2SI(X64Reg xregdest, const OpArg& arg); + void CVTTSD2SI(X64Reg xregdest, const OpArg& arg); // SSE2: Packed integer instructions - void PACKSSDW(X64Reg dest, OpArg arg); - void PACKSSWB(X64Reg dest, OpArg arg); - void PACKUSDW(X64Reg dest, OpArg arg); - void PACKUSWB(X64Reg dest, OpArg arg); + void PACKSSDW(X64Reg dest, const OpArg& arg); + void PACKSSWB(X64Reg dest, const OpArg& arg); + void PACKUSDW(X64Reg dest, const OpArg& arg); + void PACKUSWB(X64Reg dest, const OpArg& arg); void PUNPCKLBW(X64Reg dest, const OpArg &arg); void PUNPCKLWD(X64Reg dest, const OpArg &arg); void PUNPCKLDQ(X64Reg dest, const OpArg &arg); void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); - void PTEST(X64Reg dest, OpArg arg); - void PAND(X64Reg dest, OpArg arg); - void PANDN(X64Reg dest, OpArg arg); - void PXOR(X64Reg dest, OpArg arg); - void POR(X64Reg dest, OpArg arg); - - void PADDB(X64Reg dest, OpArg arg); - void PADDW(X64Reg dest, OpArg arg); - void PADDD(X64Reg dest, OpArg arg); - void PADDQ(X64Reg dest, OpArg arg); - - void PADDSB(X64Reg dest, OpArg arg); - void PADDSW(X64Reg dest, OpArg arg); - void PADDUSB(X64Reg dest, OpArg arg); - void PADDUSW(X64Reg dest, OpArg arg); - - void PSUBB(X64Reg dest, OpArg arg); - void PSUBW(X64Reg dest, OpArg arg); - void PSUBD(X64Reg dest, OpArg arg); - void PSUBQ(X64Reg dest, OpArg arg); - - void PSUBSB(X64Reg dest, OpArg arg); - void PSUBSW(X64Reg dest, OpArg arg); - void PSUBUSB(X64Reg dest, OpArg arg); - void PSUBUSW(X64Reg dest, OpArg arg); - - void PAVGB(X64Reg dest, OpArg arg); - void PAVGW(X64Reg dest, OpArg arg); - - void PCMPEQB(X64Reg dest, OpArg arg); - void PCMPEQW(X64Reg dest, OpArg arg); - void PCMPEQD(X64Reg dest, OpArg arg); - - void PCMPGTB(X64Reg dest, OpArg arg); - void PCMPGTW(X64Reg dest, OpArg arg); - void PCMPGTD(X64Reg dest, OpArg arg); - - void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); - void PINSRW(X64Reg dest, OpArg arg, u8 subreg); - - void PMADDWD(X64Reg dest, OpArg arg); - void PSADBW(X64Reg dest, OpArg arg); - - void PMAXSW(X64Reg dest, OpArg arg); - void PMAXUB(X64Reg dest, OpArg arg); - void PMINSW(X64Reg dest, OpArg arg); - void PMINUB(X64Reg dest, OpArg arg); + void PTEST(X64Reg dest, const OpArg& arg); + void PAND(X64Reg dest, const OpArg& arg); + void PANDN(X64Reg dest, const OpArg& arg); + void PXOR(X64Reg dest, const OpArg& arg); + void POR(X64Reg dest, const OpArg& arg); + + void PADDB(X64Reg dest, const OpArg& arg); + void PADDW(X64Reg dest, const OpArg& arg); + void PADDD(X64Reg dest, const OpArg& arg); + void PADDQ(X64Reg dest, const OpArg& arg); + + void PADDSB(X64Reg dest, const OpArg& arg); + void PADDSW(X64Reg dest, const OpArg& arg); + void PADDUSB(X64Reg dest, const OpArg& arg); + void PADDUSW(X64Reg dest, const OpArg& arg); + + void PSUBB(X64Reg dest, const OpArg& arg); + void PSUBW(X64Reg dest, const OpArg& arg); + void PSUBD(X64Reg dest, const OpArg& arg); + void PSUBQ(X64Reg dest, const OpArg& arg); + + void PSUBSB(X64Reg dest, const OpArg& arg); + void PSUBSW(X64Reg dest, const OpArg& arg); + void PSUBUSB(X64Reg dest, const OpArg& arg); + void PSUBUSW(X64Reg dest, const OpArg& arg); + + void PAVGB(X64Reg dest, const OpArg& arg); + void PAVGW(X64Reg dest, const OpArg& arg); + + void PCMPEQB(X64Reg dest, const OpArg& arg); + void PCMPEQW(X64Reg dest, const OpArg& arg); + void PCMPEQD(X64Reg dest, const OpArg& arg); + + void PCMPGTB(X64Reg dest, const OpArg& arg); + void PCMPGTW(X64Reg dest, const OpArg& arg); + void PCMPGTD(X64Reg dest, const OpArg& arg); + + void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg); + void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg); + + void PMADDWD(X64Reg dest, const OpArg& arg); + void PSADBW(X64Reg dest, const OpArg& arg); + + void PMAXSW(X64Reg dest, const OpArg& arg); + void PMAXUB(X64Reg dest, const OpArg& arg); + void PMINSW(X64Reg dest, const OpArg& arg); + void PMINUB(X64Reg dest, const OpArg& arg); // SSE4: More MAX/MIN instructions. - void PMINSB(X64Reg dest, OpArg arg); - void PMINSD(X64Reg dest, OpArg arg); - void PMINUW(X64Reg dest, OpArg arg); - void PMINUD(X64Reg dest, OpArg arg); - void PMAXSB(X64Reg dest, OpArg arg); - void PMAXSD(X64Reg dest, OpArg arg); - void PMAXUW(X64Reg dest, OpArg arg); - void PMAXUD(X64Reg dest, OpArg arg); - - void PMOVMSKB(X64Reg dest, OpArg arg); - void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); - void PSHUFB(X64Reg dest, OpArg arg); - - void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); - void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); + void PMINSB(X64Reg dest, const OpArg& arg); + void PMINSD(X64Reg dest, const OpArg& arg); + void PMINUW(X64Reg dest, const OpArg& arg); + void PMINUD(X64Reg dest, const OpArg& arg); + void PMAXSB(X64Reg dest, const OpArg& arg); + void PMAXSD(X64Reg dest, const OpArg& arg); + void PMAXUW(X64Reg dest, const OpArg& arg); + void PMAXUD(X64Reg dest, const OpArg& arg); + + void PMOVMSKB(X64Reg dest, const OpArg& arg); + void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle); + void PSHUFB(X64Reg dest, const OpArg& arg); + + void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle); + void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle); void PSRLW(X64Reg reg, int shift); void PSRLD(X64Reg reg, int shift); void PSRLQ(X64Reg reg, int shift); - void PSRLQ(X64Reg reg, OpArg arg); + void PSRLQ(X64Reg reg, const OpArg& arg); void PSRLDQ(X64Reg reg, int shift); void PSLLW(X64Reg reg, int shift); @@ -833,198 +813,198 @@ public: void PSRAD(X64Reg reg, int shift); // SSE4: data type conversions - void PMOVSXBW(X64Reg dest, OpArg arg); - void PMOVSXBD(X64Reg dest, OpArg arg); - void PMOVSXBQ(X64Reg dest, OpArg arg); - void PMOVSXWD(X64Reg dest, OpArg arg); - void PMOVSXWQ(X64Reg dest, OpArg arg); - void PMOVSXDQ(X64Reg dest, OpArg arg); - void PMOVZXBW(X64Reg dest, OpArg arg); - void PMOVZXBD(X64Reg dest, OpArg arg); - void PMOVZXBQ(X64Reg dest, OpArg arg); - void PMOVZXWD(X64Reg dest, OpArg arg); - void PMOVZXWQ(X64Reg dest, OpArg arg); - void PMOVZXDQ(X64Reg dest, OpArg arg); + void PMOVSXBW(X64Reg dest, const OpArg& arg); + void PMOVSXBD(X64Reg dest, const OpArg& arg); + void PMOVSXBQ(X64Reg dest, const OpArg& arg); + void PMOVSXWD(X64Reg dest, const OpArg& arg); + void PMOVSXWQ(X64Reg dest, const OpArg& arg); + void PMOVSXDQ(X64Reg dest, const OpArg& arg); + void PMOVZXBW(X64Reg dest, const OpArg& arg); + void PMOVZXBD(X64Reg dest, const OpArg& arg); + void PMOVZXBQ(X64Reg dest, const OpArg& arg); + void PMOVZXWD(X64Reg dest, const OpArg& arg); + void PMOVZXWQ(X64Reg dest, const OpArg& arg); + void PMOVZXDQ(X64Reg dest, const OpArg& arg); // SSE4: variable blend instructions (xmm0 implicit argument) - void PBLENDVB(X64Reg dest, OpArg arg); - void BLENDVPS(X64Reg dest, OpArg arg); - void BLENDVPD(X64Reg dest, OpArg arg); + void PBLENDVB(X64Reg dest, const OpArg& arg); + void BLENDVPS(X64Reg dest, const OpArg& arg); + void BLENDVPD(X64Reg dest, const OpArg& arg); void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) - void ROUNDSS(X64Reg dest, OpArg arg, u8 mode); - void ROUNDSD(X64Reg dest, OpArg arg, u8 mode); - void ROUNDPS(X64Reg dest, OpArg arg, u8 mode); - void ROUNDPD(X64Reg dest, OpArg arg, u8 mode); - - inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); } + void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode); + void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode); + void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); + void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); + + void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } + void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } + void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); } + void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); } + + void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } + void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } + void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); } + void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); } + + void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } + void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } + void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); } + void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); } + + void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } + void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } + void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); } + void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); } // AVX - void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); - void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - - void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - - void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle); + void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + + void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + + void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); // FMA3 - void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); // VEX GPR instructions - void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); - void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void BLSR(int bits, X64Reg regOp, OpArg arg); - void BLSMSK(int bits, X64Reg regOp, OpArg arg); - void BLSI(int bits, X64Reg regOp, OpArg arg); - void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate); + void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void BLSR(int bits, X64Reg regOp, const OpArg& arg); + void BLSMSK(int bits, X64Reg regOp, const OpArg& arg); + void BLSI(int bits, X64Reg regOp, const OpArg& arg); + void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); void RDTSC(); // Utility functions // The difference between this and CALL is that this aligns the stack // where appropriate. - void ABI_CallFunction(const void *func); + void ABI_CallFunction(const void* func); template <typename T> void ABI_CallFunction(T (*func)()) { - ABI_CallFunction((const void *)func); + ABI_CallFunction((const void*)func); } - void ABI_CallFunction(const u8 *func) { - ABI_CallFunction((const void *)func); + void ABI_CallFunction(const u8* func) { + ABI_CallFunction((const void*)func); } - void ABI_CallFunctionC16(const void *func, u16 param1); - void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); + void ABI_CallFunctionC16(const void* func, u16 param1); + void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2); // These only support u32 parameters, but that's enough for a lot of uses. // These will destroy the 1 or 2 first "parameter regs". - void ABI_CallFunctionC(const void *func, u32 param1); - void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2); - void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3); - void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3); - void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4); - void ABI_CallFunctionP(const void *func, void *param1); - void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2); - void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3); - void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3); - void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2); - void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3); - void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1); - void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); + void ABI_CallFunctionC(const void* func, u32 param1); + void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2); + void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3); + void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3); + void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4); + void ABI_CallFunctionP(const void* func, void* param1); + void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2); + void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3); + void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3); + void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2); + void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3); + void ABI_CallFunctionA(const void* func, const OpArg& arg1); + void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2); // Pass a register as a parameter. - void ABI_CallFunctionR(const void *func, X64Reg reg1); - void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); + void ABI_CallFunctionR(const void* func, X64Reg reg1); + void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2); template <typename Tr, typename T1> void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { - ABI_CallFunctionC((const void *)func, param1); + ABI_CallFunctionC((const void*)func, param1); } // A function that doesn't have any control over what it will do to regs, @@ -1048,9 +1028,9 @@ public: void ABI_EmitEpilogue(int maxCallParams); #ifdef _M_IX86 - inline int ABI_GetNumXMMRegs() { return 8; } + static int ABI_GetNumXMMRegs() { return 8; } #else - inline int ABI_GetNumXMMRegs() { return 16; } + static int ABI_GetNumXMMRegs() { return 16; } #endif }; // class XEmitter diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6cc60fd58..c17290b9b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -29,6 +29,7 @@ set(SRCS hle/kernel/address_arbiter.cpp hle/kernel/event.cpp hle/kernel/kernel.cpp + hle/kernel/memory.cpp hle/kernel/mutex.cpp hle/kernel/process.cpp hle/kernel/resource_limit.cpp @@ -115,7 +116,6 @@ set(SRCS loader/loader.cpp loader/ncch.cpp tracer/recorder.cpp - mem_map.cpp memory.cpp settings.cpp system.cpp @@ -157,6 +157,7 @@ set(HEADERS hle/kernel/address_arbiter.h hle/kernel/event.h hle/kernel/kernel.h + hle/kernel/memory.h hle/kernel/mutex.h hle/kernel/process.h hle/kernel/resource_limit.h @@ -245,7 +246,6 @@ set(HEADERS loader/ncch.h tracer/recorder.h tracer/citrace.h - mem_map.h memory.h memory_setup.h settings.h diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 422e80b50..0fddb07a0 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/profiler.h" #include "core/memory.h" @@ -48,65 +49,47 @@ enum { typedef unsigned int (*shtop_fp_t)(ARMul_State* cpu, unsigned int sht_oper); -static int CondPassed(ARMul_State* cpu, unsigned int cond) { - const u32 NFLAG = cpu->NFlag; - const u32 ZFLAG = cpu->ZFlag; - const u32 CFLAG = cpu->CFlag; - const u32 VFLAG = cpu->VFlag; - - int temp = 0; +static bool CondPassed(ARMul_State* cpu, unsigned int cond) { + const bool n_flag = cpu->NFlag != 0; + const bool z_flag = cpu->ZFlag != 0; + const bool c_flag = cpu->CFlag != 0; + const bool v_flag = cpu->VFlag != 0; switch (cond) { - case 0x0: - temp = ZFLAG; - break; - case 0x1: // NE - temp = !ZFLAG; - break; - case 0x2: // CS - temp = CFLAG; - break; - case 0x3: // CC - temp = !CFLAG; - break; - case 0x4: // MI - temp = NFLAG; - break; - case 0x5: // PL - temp = !NFLAG; - break; - case 0x6: // VS - temp = VFLAG; - break; - case 0x7: // VC - temp = !VFLAG; - break; - case 0x8: // HI - temp = (CFLAG && !ZFLAG); - break; - case 0x9: // LS - temp = (!CFLAG || ZFLAG); - break; - case 0xa: // GE - temp = ((!NFLAG && !VFLAG) || (NFLAG && VFLAG)); - break; - case 0xb: // LT - temp = ((NFLAG && !VFLAG) || (!NFLAG && VFLAG)); - break; - case 0xc: // GT - temp = ((!NFLAG && !VFLAG && !ZFLAG) || (NFLAG && VFLAG && !ZFLAG)); - break; - case 0xd: // LE - temp = ((NFLAG && !VFLAG) || (!NFLAG && VFLAG)) || ZFLAG; - break; - case 0xe: // AL - temp = 1; - break; - case 0xf: - temp = 1; - break; - } - return temp; + case ConditionCode::EQ: + return z_flag; + case ConditionCode::NE: + return !z_flag; + case ConditionCode::CS: + return c_flag; + case ConditionCode::CC: + return !c_flag; + case ConditionCode::MI: + return n_flag; + case ConditionCode::PL: + return !n_flag; + case ConditionCode::VS: + return v_flag; + case ConditionCode::VC: + return !v_flag; + case ConditionCode::HI: + return (c_flag && !z_flag); + case ConditionCode::LS: + return (!c_flag || z_flag); + case ConditionCode::GE: + return (n_flag == v_flag); + case ConditionCode::LT: + return (n_flag != v_flag); + case ConditionCode::GT: + return (!z_flag && (n_flag == v_flag)); + case ConditionCode::LE: + return (z_flag || (n_flag != v_flag)); + case ConditionCode::AL: + case ConditionCode::NV: // Unconditional + return true; + } + + return false; } static unsigned int DPO(Immediate)(ARMul_State* cpu, unsigned int sht_oper) { @@ -3522,8 +3505,11 @@ enum { FETCH_EXCEPTION }; +MICROPROFILE_DEFINE(DynCom_Decode, "DynCom", "Decode", MP_RGB(255, 64, 64)); + static int InterpreterTranslate(ARMul_State* cpu, int& bb_start, u32 addr) { Common::Profiling::ScopeTimer timer_decode(profile_decode); + MICROPROFILE_SCOPE(DynCom_Decode); // Decode instruction, get index // Allocate memory and init InsCream @@ -3588,8 +3574,11 @@ static int clz(unsigned int x) { return n; } +MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0)); + unsigned InterpreterMainLoop(ARMul_State* cpu) { Common::Profiling::ScopeTimer timer_execute(profile_execute); + MICROPROFILE_SCOPE(DynCom_Execute); #undef RM #undef RS diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp index ccb2eb0eb..0491717dc 100644 --- a/src/core/arm/skyeye_common/armstate.cpp +++ b/src/core/arm/skyeye_common/armstate.cpp @@ -4,7 +4,6 @@ #include "common/swap.h" #include "common/logging/log.h" -#include "core/mem_map.h" #include "core/memory.h" #include "core/arm/skyeye_common/armstate.h" #include "core/arm/skyeye_common/vfp/vfp.h" diff --git a/src/core/arm/skyeye_common/armsupp.cpp b/src/core/arm/skyeye_common/armsupp.cpp index d31fb9449..883713e86 100644 --- a/src/core/arm/skyeye_common/armsupp.cpp +++ b/src/core/arm/skyeye_common/armsupp.cpp @@ -17,7 +17,6 @@ #include "common/logging/log.h" -#include "core/mem_map.h" #include "core/arm/skyeye_common/arm_regformat.h" #include "core/arm/skyeye_common/armstate.h" #include "core/arm/skyeye_common/armsupp.h" diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp index aea936d2d..b1a72dc0c 100644 --- a/src/core/hle/config_mem.cpp +++ b/src/core/hle/config_mem.cpp @@ -25,10 +25,6 @@ void Init() { config_mem.sys_core_ver = 0x2; config_mem.unit_info = 0x1; // Bit 0 set for Retail config_mem.prev_firm = 0; - config_mem.app_mem_type = 0x2; // Default app mem type is 0 - config_mem.app_mem_alloc = 0x06000000; // Set to 96MB, since some games use more than the default (64MB) - config_mem.base_mem_alloc = 0x01400000; // Default base memory is 20MB - config_mem.sys_mem_alloc = Memory::FCRAM_SIZE - (config_mem.app_mem_alloc + config_mem.base_mem_alloc); config_mem.firm_unk = 0; config_mem.firm_version_rev = 0; config_mem.firm_version_min = 0x40; @@ -36,7 +32,4 @@ void Init() { config_mem.firm_sys_core_ver = 0x2; } -void Shutdown() { -} - } // namespace diff --git a/src/core/hle/config_mem.h b/src/core/hle/config_mem.h index 9825a09e8..24a1254f2 100644 --- a/src/core/hle/config_mem.h +++ b/src/core/hle/config_mem.h @@ -52,6 +52,5 @@ static_assert(sizeof(ConfigMemDef) == Memory::CONFIG_MEMORY_SIZE, "Config Memory extern ConfigMemDef config_mem; void Init(); -void Shutdown(); } // namespace diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h index 1a0518926..5846a161b 100644 --- a/src/core/hle/function_wrappers.h +++ b/src/core/hle/function_wrappers.h @@ -172,6 +172,14 @@ template<ResultCode func(u32, s64, s64)> void Wrap() { FuncReturn(func(PARAM(0), param1, param2).raw); } +template<ResultCode func(s64*, Handle, u32)> void Wrap() { + s64 param_1 = 0; + u32 retval = func(¶m_1, PARAM(1), PARAM(2)).raw; + Core::g_app_core->SetReg(1, (u32)param_1); + Core::g_app_core->SetReg(2, (u32)(param_1 >> 32)); + FuncReturn(retval); +} + //////////////////////////////////////////////////////////////////////////////////////////////////// // Function wrappers that return type u32 diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp index cd0a400dc..331b1b22a 100644 --- a/src/core/hle/hle.cpp +++ b/src/core/hle/hle.cpp @@ -34,8 +34,6 @@ void Reschedule(const char *reason) { void Init() { Service::Init(); - ConfigMem::Init(); - SharedPage::Init(); g_reschedule = false; @@ -43,8 +41,6 @@ void Init() { } void Shutdown() { - ConfigMem::Shutdown(); - SharedPage::Shutdown(); Service::Shutdown(); LOG_DEBUG(Kernel, "shutdown OK"); diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 5711c0405..7a401a965 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -7,11 +7,14 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "core/hle/config_mem.h" #include "core/hle/kernel/kernel.h" -#include "core/hle/kernel/resource_limit.h" +#include "core/hle/kernel/memory.h" #include "core/hle/kernel/process.h" +#include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/timer.h" +#include "core/hle/shared_page.h" namespace Kernel { @@ -119,6 +122,13 @@ void HandleTable::Clear() { /// Initialize the kernel void Init() { + ConfigMem::Init(); + SharedPage::Init(); + + // TODO(yuriks): The memory type parameter needs to be determined by the ExHeader field instead + // For now it defaults to the one with a largest allocation to the app + Kernel::MemoryInit(2); // Allocates 96MB to the application + Kernel::ResourceLimitsInit(); Kernel::ThreadingInit(); Kernel::TimersInit(); @@ -131,11 +141,14 @@ void Init() { /// Shutdown the kernel void Shutdown() { + g_handle_table.Clear(); // Free all kernel objects + Kernel::ThreadingShutdown(); + g_current_process = nullptr; + Kernel::TimersShutdown(); Kernel::ResourceLimitsShutdown(); - g_handle_table.Clear(); // Free all kernel objects - g_current_process = nullptr; + Kernel::MemoryShutdown(); } } // namespace diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp new file mode 100644 index 000000000..e4fc5f3c4 --- /dev/null +++ b/src/core/hle/kernel/memory.cpp @@ -0,0 +1,136 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <map> +#include <memory> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "common/logging/log.h" + +#include "core/hle/config_mem.h" +#include "core/hle/kernel/memory.h" +#include "core/hle/kernel/vm_manager.h" +#include "core/hle/result.h" +#include "core/hle/shared_page.h" +#include "core/memory.h" +#include "core/memory_setup.h" + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace Kernel { + +static MemoryRegionInfo memory_regions[3]; + +/// Size of the APPLICATION, SYSTEM and BASE memory regions (respectively) for each sytem +/// memory configuration type. +static const u32 memory_region_sizes[8][3] = { + // Old 3DS layouts + {0x04000000, 0x02C00000, 0x01400000}, // 0 + { /* This appears to be unused. */ }, // 1 + {0x06000000, 0x00C00000, 0x01400000}, // 2 + {0x05000000, 0x01C00000, 0x01400000}, // 3 + {0x04800000, 0x02400000, 0x01400000}, // 4 + {0x02000000, 0x04C00000, 0x01400000}, // 5 + + // New 3DS layouts + {0x07C00000, 0x06400000, 0x02000000}, // 6 + {0x0B200000, 0x02E00000, 0x02000000}, // 7 +}; + +void MemoryInit(u32 mem_type) { + // TODO(yuriks): On the n3DS, all o3DS configurations (<=5) are forced to 6 instead. + ASSERT_MSG(mem_type <= 5, "New 3DS memory configuration aren't supported yet!"); + ASSERT(mem_type != 1); + + // The kernel allocation regions (APPLICATION, SYSTEM and BASE) are laid out in sequence, with + // the sizes specified in the memory_region_sizes table. + VAddr base = 0; + for (int i = 0; i < 3; ++i) { + memory_regions[i].base = base; + memory_regions[i].size = memory_region_sizes[mem_type][i]; + memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>(); + + base += memory_regions[i].size; + } + + // We must've allocated the entire FCRAM by the end + ASSERT(base == Memory::FCRAM_SIZE); + + using ConfigMem::config_mem; + config_mem.app_mem_type = mem_type; + // app_mem_malloc does not always match the configured size for memory_region[0]: in case the + // n3DS type override is in effect it reports the size the game expects, not the real one. + config_mem.app_mem_alloc = memory_region_sizes[mem_type][0]; + config_mem.sys_mem_alloc = memory_regions[1].size; + config_mem.base_mem_alloc = memory_regions[2].size; +} + +void MemoryShutdown() { + for (auto& region : memory_regions) { + region.base = 0; + region.size = 0; + region.linear_heap_memory = nullptr; + } +} + +MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) { + switch (region) { + case MemoryRegion::APPLICATION: + return &memory_regions[0]; + case MemoryRegion::SYSTEM: + return &memory_regions[1]; + case MemoryRegion::BASE: + return &memory_regions[2]; + default: + UNREACHABLE(); + } +} + +} + +namespace Memory { + +namespace { + +struct MemoryArea { + u32 base; + u32 size; + const char* name; +}; + +// We don't declare the IO regions in here since its handled by other means. +static MemoryArea memory_areas[] = { + {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory + {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) + {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory + {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory +}; + +} + +void Init() { + InitMemoryMap(); + LOG_DEBUG(HW_Memory, "initialized OK"); +} + +void InitLegacyAddressSpace(Kernel::VMManager& address_space) { + using namespace Kernel; + + for (MemoryArea& area : memory_areas) { + auto block = std::make_shared<std::vector<u8>>(area.size); + address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap(); + } + + auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR, + (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom(); + address_space.Reprotect(cfg_mem_vma, VMAPermission::Read); + + auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, + (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); + address_space.Reprotect(shared_page_vma, VMAPermission::Read); +} + +} // namespace diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h new file mode 100644 index 000000000..36690b091 --- /dev/null +++ b/src/core/hle/kernel/memory.h @@ -0,0 +1,35 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> + +#include "common/common_types.h" + +#include "core/hle/kernel/process.h" + +namespace Kernel { + +class VMManager; + +struct MemoryRegionInfo { + u32 base; // Not an address, but offset from start of FCRAM + u32 size; + + std::shared_ptr<std::vector<u8>> linear_heap_memory; +}; + +void MemoryInit(u32 mem_type); +void MemoryShutdown(); +MemoryRegionInfo* GetMemoryRegion(MemoryRegion region); + +} + +namespace Memory { + +void Init(); +void InitLegacyAddressSpace(Kernel::VMManager& address_space); + +} // namespace diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index a7892c652..c2b4963d4 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -7,11 +7,11 @@ #include "common/logging/log.h" #include "common/make_unique.h" +#include "core/hle/kernel/memory.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/vm_manager.h" -#include "core/mem_map.h" #include "core/memory.h" namespace Kernel { @@ -36,8 +36,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) { process->codeset = std::move(code_set); process->flags.raw = 0; process->flags.memory_region = MemoryRegion::APPLICATION; - process->address_space = Common::make_unique<VMManager>(); - Memory::InitLegacyAddressSpace(*process->address_space); + Memory::InitLegacyAddressSpace(process->vm_manager); return process; } @@ -93,9 +92,11 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) { mapping.unk_flag = false; } else if ((type & 0xFE0) == 0xFC0) { // 0x01FF // Kernel version - int minor = descriptor & 0xFF; - int major = (descriptor >> 8) & 0xFF; - LOG_INFO(Loader, "ExHeader kernel version ignored: %d.%d", major, minor); + kernel_version = descriptor & 0xFFFF; + + int minor = kernel_version & 0xFF; + int major = (kernel_version >> 8) & 0xFF; + LOG_INFO(Loader, "ExHeader kernel version: %d.%d", major, minor); } else { LOG_ERROR(Loader, "Unhandled kernel caps descriptor: 0x%08X", descriptor); } @@ -103,20 +104,161 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) { } void Process::Run(s32 main_thread_priority, u32 stack_size) { + memory_region = GetMemoryRegion(flags.memory_region); + auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) { - auto vma = address_space->MapMemoryBlock(segment.addr, codeset->memory, + auto vma = vm_manager.MapMemoryBlock(segment.addr, codeset->memory, segment.offset, segment.size, memory_state).Unwrap(); - address_space->Reprotect(vma, permissions); + vm_manager.Reprotect(vma, permissions); + misc_memory_used += segment.size; }; + // Map CodeSet segments MapSegment(codeset->code, VMAPermission::ReadExecute, MemoryState::Code); MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code); MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private); - address_space->LogLayout(); + // Allocate and map stack + vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size, + std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, MemoryState::Locked + ).Unwrap(); + misc_memory_used += stack_size; + + vm_manager.LogLayout(Log::Level::Debug); Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority); } +VAddr Process::GetLinearHeapBase() const { + return (kernel_version < 0x22C ? Memory::LINEAR_HEAP_VADDR : Memory::NEW_LINEAR_HEAP_VADDR) + + memory_region->base; +} + +VAddr Process::GetLinearHeapLimit() const { + return GetLinearHeapBase() + memory_region->size; +} + +ResultVal<VAddr> Process::HeapAllocate(VAddr target, u32 size, VMAPermission perms) { + if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) { + return ERR_INVALID_ADDRESS; + } + + if (heap_memory == nullptr) { + // Initialize heap + heap_memory = std::make_shared<std::vector<u8>>(); + heap_start = heap_end = target; + } + + // If necessary, expand backing vector to cover new heap extents. + if (target < heap_start) { + heap_memory->insert(begin(*heap_memory), heap_start - target, 0); + heap_start = target; + vm_manager.RefreshMemoryBlockMappings(heap_memory.get()); + } + if (target + size > heap_end) { + heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0); + heap_end = target + size; + vm_manager.RefreshMemoryBlockMappings(heap_memory.get()); + } + ASSERT(heap_end - heap_start == heap_memory->size()); + + CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, heap_memory, target - heap_start, size, MemoryState::Private)); + vm_manager.Reprotect(vma, perms); + + heap_used += size; + + return MakeResult<VAddr>(heap_end - size); +} + +ResultCode Process::HeapFree(VAddr target, u32 size) { + if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) { + return ERR_INVALID_ADDRESS; + } + + if (size == 0) { + return RESULT_SUCCESS; + } + + ResultCode result = vm_manager.UnmapRange(target, size); + if (result.IsError()) return result; + + heap_used -= size; + + return RESULT_SUCCESS; +} + +ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission perms) { + auto& linheap_memory = memory_region->linear_heap_memory; + + VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size(); + // Games and homebrew only ever seem to pass 0 here (which lets the kernel decide the address), + // but explicit addresses are also accepted and respected. + if (target == 0) { + target = heap_end; + } + + if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() || + target > heap_end || target + size < target) { + + return ERR_INVALID_ADDRESS; + } + + // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its + // end. It's possible to free gaps in the middle of the heap and then reallocate them later, + // but expansions are only allowed at the end. + if (target == heap_end) { + linheap_memory->insert(linheap_memory->end(), size, 0); + vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); + } + + // TODO(yuriks): As is, this lets processes map memory allocated by other processes from the + // same region. It is unknown if or how the 3DS kernel checks against this. + size_t offset = target - GetLinearHeapBase(); + CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, linheap_memory, offset, size, MemoryState::Continuous)); + vm_manager.Reprotect(vma, perms); + + linear_heap_used += size; + + return MakeResult<VAddr>(target); +} + +ResultCode Process::LinearFree(VAddr target, u32 size) { + auto& linheap_memory = memory_region->linear_heap_memory; + + if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() || + target + size < target) { + + return ERR_INVALID_ADDRESS; + } + + if (size == 0) { + return RESULT_SUCCESS; + } + + VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size(); + if (target + size > heap_end) { + return ERR_INVALID_ADDRESS_STATE; + } + + ResultCode result = vm_manager.UnmapRange(target, size); + if (result.IsError()) return result; + + linear_heap_used -= size; + + if (target + size == heap_end) { + // End of linear heap has been freed, so check what's the last allocated block in it and + // reduce the size. + auto vma = vm_manager.FindVMA(target); + ASSERT(vma != vm_manager.vma_map.end()); + ASSERT(vma->second.type == VMAType::Free); + VAddr new_end = vma->second.base; + if (new_end >= GetLinearHeapBase()) { + linheap_memory->resize(new_end - GetLinearHeapBase()); + } + } + + return RESULT_SUCCESS; +} + Kernel::Process::Process() {} Kernel::Process::~Process() {} diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 83d3aceae..60e17f251 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -15,6 +15,7 @@ #include "common/common_types.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/vm_manager.h" namespace Kernel { @@ -48,7 +49,7 @@ union ProcessFlags { }; class ResourceLimit; -class VMManager; +struct MemoryRegionInfo; struct CodeSet final : public Object { static SharedPtr<CodeSet> Create(std::string name, u64 program_id); @@ -104,14 +105,12 @@ public: /// processes access to specific I/O regions and device memory. boost::container::static_vector<AddressMapping, 8> address_mappings; ProcessFlags flags; + /// Kernel compatibility version for this process + u16 kernel_version = 0; /// The id of this process u32 process_id = next_process_id++; - /// Bitmask of the used TLS slots - std::bitset<300> used_tls_slots; - std::unique_ptr<VMManager> address_space; - /** * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them * to this process. @@ -123,6 +122,36 @@ public: */ void Run(s32 main_thread_priority, u32 stack_size); + + /////////////////////////////////////////////////////////////////////////////////////////////// + // Memory Management + + VMManager vm_manager; + + // Memory used to back the allocations in the regular heap. A single vector is used to cover + // the entire virtual address space extents that bound the allocations, including any holes. + // This makes deallocation and reallocation of holes fast and keeps process memory contiguous + // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. + std::shared_ptr<std::vector<u8>> heap_memory; + // The left/right bounds of the address space covered by heap_memory. + VAddr heap_start = 0, heap_end = 0; + + u32 heap_used = 0, linear_heap_used = 0, misc_memory_used = 0; + + MemoryRegionInfo* memory_region = nullptr; + + /// Bitmask of the used TLS slots + std::bitset<300> used_tls_slots; + + VAddr GetLinearHeapBase() const; + VAddr GetLinearHeapLimit() const; + + ResultVal<VAddr> HeapAllocate(VAddr target, u32 size, VMAPermission perms); + ResultCode HeapFree(VAddr target, u32 size); + + ResultVal<VAddr> LinearAllocate(VAddr target, u32 size, VMAPermission perms); + ResultCode LinearFree(VAddr target, u32 size); + private: Process(); ~Process() override; diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index 94b3e3298..67dde08c2 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp @@ -6,7 +6,6 @@ #include "common/logging/log.h" -#include "core/mem_map.h" #include "core/hle/kernel/resource_limit.h" namespace Kernel { diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index 4137683b5..1f477664b 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp @@ -20,6 +20,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissi shared_memory->name = std::move(name); shared_memory->base_address = 0x0; + shared_memory->fixed_address = 0x0; shared_memory->size = size; shared_memory->permissions = permissions; shared_memory->other_permissions = other_permissions; @@ -30,9 +31,31 @@ SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissi ResultCode SharedMemory::Map(VAddr address, MemoryPermission permissions, MemoryPermission other_permissions) { + if (base_address != 0) { + LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: already mapped at 0x%08X!", + GetObjectId(), address, name.c_str(), base_address); + // TODO: Verify error code with hardware + return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, + ErrorSummary::InvalidArgument, ErrorLevel::Permanent); + } + + if (fixed_address != 0) { + if (address != 0 && address != fixed_address) { + LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: fixed_addres is 0x%08X!", + GetObjectId(), address, name.c_str(), fixed_address); + // TODO: Verify error code with hardware + return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, + ErrorSummary::InvalidArgument, ErrorLevel::Permanent); + } + + // HACK(yuriks): This is only here to support the APT shared font mapping right now. + // Later, this should actually map the memory block onto the address space. + return RESULT_SUCCESS; + } + if (address < Memory::SHARED_MEMORY_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) { - LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X outside of shared mem bounds!", - GetObjectId(), address); + LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s outside of shared mem bounds!", + GetObjectId(), address, name.c_str()); // TODO: Verify error code with hardware return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, ErrorSummary::InvalidArgument, ErrorLevel::Permanent); diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index 7a2922776..35b550d12 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h @@ -61,6 +61,8 @@ public: /// Address of shared memory block in the process. VAddr base_address; + /// Fixed address to allow mapping to. Used for blocks created from the linear heap. + VAddr fixed_address; /// Size of the memory block. Page-aligned. u32 size; /// Permission restrictions applied to the process which created the block. diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 29ea6d531..c10126513 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -117,6 +117,7 @@ void Thread::Stop() { wait_objects.clear(); Kernel::g_current_process->used_tls_slots[tls_index] = false; + g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; HLE::Reschedule(__func__); } @@ -414,6 +415,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, } ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); + g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used // to initialize the context @@ -504,7 +506,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { } VAddr Thread::GetTLSAddress() const { - return Memory::TLS_AREA_VADDR + tls_index * 0x200; + return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE; } //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 205cc7b53..2610acf76 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -11,6 +11,15 @@ namespace Kernel { +static const char* GetMemoryStateName(MemoryState state) { + static const char* names[] = { + "Free", "Reserved", "IO", "Static", "Code", "Private", "Shared", "Continuous", "Aliased", + "Alias", "AliasCode", "Locked", + }; + + return names[(int)state]; +} + bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { ASSERT(base + size == next.base); if (permissions != next.permissions || @@ -51,11 +60,15 @@ void VMManager::Reset() { } VMManager::VMAHandle VMManager::FindVMA(VAddr target) const { - return std::prev(vma_map.upper_bound(target)); + if (target >= MAX_ADDRESS) { + return vma_map.end(); + } else { + return std::prev(vma_map.upper_bound(target)); + } } ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, - std::shared_ptr<std::vector<u8>> block, u32 offset, u32 size, MemoryState state) { + std::shared_ptr<std::vector<u8>> block, size_t offset, u32 size, MemoryState state) { ASSERT(block != nullptr); ASSERT(offset + size <= block->size()); @@ -106,10 +119,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u3 return MakeResult<VMAHandle>(MergeAdjacent(vma_handle)); } -void VMManager::Unmap(VMAHandle vma_handle) { - VMAIter iter = StripIterConstness(vma_handle); - - VirtualMemoryArea& vma = iter->second; +VMManager::VMAIter VMManager::Unmap(VMAIter vma_handle) { + VirtualMemoryArea& vma = vma_handle->second; vma.type = VMAType::Free; vma.permissions = VMAPermission::None; vma.meminfo_state = MemoryState::Free; @@ -121,26 +132,67 @@ void VMManager::Unmap(VMAHandle vma_handle) { UpdatePageTableForVMA(vma); - MergeAdjacent(iter); + return MergeAdjacent(vma_handle); +} + +ResultCode VMManager::UnmapRange(VAddr target, u32 size) { + CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size)); + VAddr target_end = target + size; + + VMAIter end = vma_map.end(); + // The comparison against the end of the range must be done using addresses since VMAs can be + // merged during this process, causing invalidation of the iterators. + while (vma != end && vma->second.base < target_end) { + vma = std::next(Unmap(vma)); + } + + ASSERT(FindVMA(target)->second.size >= size); + return RESULT_SUCCESS; } -void VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) { +VMManager::VMAHandle VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) { VMAIter iter = StripIterConstness(vma_handle); VirtualMemoryArea& vma = iter->second; vma.permissions = new_perms; UpdatePageTableForVMA(vma); - MergeAdjacent(iter); + return MergeAdjacent(iter); +} + +ResultCode VMManager::ReprotectRange(VAddr target, u32 size, VMAPermission new_perms) { + CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size)); + VAddr target_end = target + size; + + VMAIter end = vma_map.end(); + // The comparison against the end of the range must be done using addresses since VMAs can be + // merged during this process, causing invalidation of the iterators. + while (vma != end && vma->second.base < target_end) { + vma = std::next(StripIterConstness(Reprotect(vma, new_perms))); + } + + return RESULT_SUCCESS; } -void VMManager::LogLayout() const { +void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { + // If this ever proves to have a noticeable performance impact, allow users of the function to + // specify a specific range of addresses to limit the scan to. for (const auto& p : vma_map) { const VirtualMemoryArea& vma = p.second; - LOG_DEBUG(Kernel, "%08X - %08X size: %8X %c%c%c", vma.base, vma.base + vma.size, vma.size, + if (block == vma.backing_block.get()) { + UpdatePageTableForVMA(vma); + } + } +} + +void VMManager::LogLayout(Log::Level log_level) const { + for (const auto& p : vma_map) { + const VirtualMemoryArea& vma = p.second; + LOG_GENERIC(Log::Class::Kernel, log_level, "%08X - %08X size: %8X %c%c%c %s", + vma.base, vma.base + vma.size, vma.size, (u8)vma.permissions & (u8)VMAPermission::Read ? 'R' : '-', (u8)vma.permissions & (u8)VMAPermission::Write ? 'W' : '-', - (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-'); + (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-', GetMemoryStateName(vma.meminfo_state)); } } @@ -151,21 +203,19 @@ VMManager::VMAIter VMManager::StripIterConstness(const VMAHandle & iter) { } ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { - ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: %8X", size); - ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: %08X", base); + ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size); + ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", base); VMAIter vma_handle = StripIterConstness(FindVMA(base)); if (vma_handle == vma_map.end()) { // Target address is outside the range managed by the kernel - return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, - ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E01BF5 + return ERR_INVALID_ADDRESS; } VirtualMemoryArea& vma = vma_handle->second; if (vma.type != VMAType::Free) { // Region is already allocated - return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, - ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5 + return ERR_INVALID_ADDRESS_STATE; } u32 start_in_vma = base - vma.base; @@ -173,8 +223,7 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { if (end_in_vma > vma.size) { // Requested allocation doesn't fit inside VMA - return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, - ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5 + return ERR_INVALID_ADDRESS_STATE; } if (end_in_vma != vma.size) { @@ -189,6 +238,35 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { return MakeResult<VMAIter>(vma_handle); } +ResultVal<VMManager::VMAIter> VMManager::CarveVMARange(VAddr target, u32 size) { + ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size); + ASSERT_MSG((target & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", target); + + VAddr target_end = target + size; + ASSERT(target_end >= target); + ASSERT(target_end <= MAX_ADDRESS); + ASSERT(size > 0); + + VMAIter begin_vma = StripIterConstness(FindVMA(target)); + VMAIter i_end = vma_map.lower_bound(target_end); + for (auto i = begin_vma; i != i_end; ++i) { + if (i->second.type == VMAType::Free) { + return ERR_INVALID_ADDRESS_STATE; + } + } + + if (target != begin_vma->second.base) { + begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base); + } + + VMAIter end_vma = StripIterConstness(FindVMA(target_end)); + if (end_vma != vma_map.end() && target_end != end_vma->second.base) { + end_vma = SplitVMA(end_vma, target_end - end_vma->second.base); + } + + return MakeResult<VMAIter>(begin_vma); +} + VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u32 offset_in_vma) { VirtualMemoryArea& old_vma = vma_handle->second; VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index b3795a94a..4e95f1f0c 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -14,6 +14,14 @@ namespace Kernel { +const ResultCode ERR_INVALID_ADDRESS{ // 0xE0E01BF5 + ErrorDescription::InvalidAddress, ErrorModule::OS, + ErrorSummary::InvalidArgument, ErrorLevel::Usage}; + +const ResultCode ERR_INVALID_ADDRESS_STATE{ // 0xE0A01BF5 + ErrorDescription::InvalidAddress, ErrorModule::OS, + ErrorSummary::InvalidState, ErrorLevel::Usage}; + enum class VMAType : u8 { /// VMA represents an unmapped region of the address space. Free, @@ -75,7 +83,7 @@ struct VirtualMemoryArea { /// Memory block backing this VMA. std::shared_ptr<std::vector<u8>> backing_block = nullptr; /// Offset into the backing_memory the mapping starts from. - u32 offset = 0; + size_t offset = 0; // Settings for type = BackingMemory /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed. @@ -141,7 +149,7 @@ public: * @param state MemoryState tag to attach to the VMA. */ ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, - u32 offset, u32 size, MemoryState state); + size_t offset, u32 size, MemoryState state); /** * Maps an unmanaged host memory pointer at a given address. @@ -163,14 +171,23 @@ public: */ ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u32 size, MemoryState state); - /// Unmaps the given VMA. - void Unmap(VMAHandle vma); + /// Unmaps a range of addresses, splitting VMAs as necessary. + ResultCode UnmapRange(VAddr target, u32 size); /// Changes the permissions of the given VMA. - void Reprotect(VMAHandle vma, VMAPermission new_perms); + VMAHandle Reprotect(VMAHandle vma, VMAPermission new_perms); + + /// Changes the permissions of a range of addresses, splitting VMAs as necessary. + ResultCode ReprotectRange(VAddr target, u32 size, VMAPermission new_perms); + + /** + * Scans all VMAs and updates the page table range of any that use the given vector as backing + * memory. This should be called after any operation that causes reallocation of the vector. + */ + void RefreshMemoryBlockMappings(const std::vector<u8>* block); /// Dumps the address space layout to the log, for debugging - void LogLayout() const; + void LogLayout(Log::Level log_level) const; private: using VMAIter = decltype(vma_map)::iterator; @@ -178,6 +195,9 @@ private: /// Converts a VMAHandle to a mutable VMAIter. VMAIter StripIterConstness(const VMAHandle& iter); + /// Unmaps the given VMA. + VMAIter Unmap(VMAIter vma); + /** * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing * the appropriate error checking. @@ -185,6 +205,12 @@ private: ResultVal<VMAIter> CarveVMA(VAddr base, u32 size); /** + * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each + * end of the range. + */ + ResultVal<VMAIter> CarveVMARange(VAddr base, u32 size); + + /** * Splits a VMA in two, at the specified offset. * @returns the right side of the split, with the original iterator becoming the left side. */ diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp index 35402341b..ba66569b4 100644 --- a/src/core/hle/service/apt/apt.cpp +++ b/src/core/hle/service/apt/apt.cpp @@ -16,6 +16,7 @@ #include "core/hle/hle.h" #include "core/hle/kernel/event.h" #include "core/hle/kernel/mutex.h" +#include "core/hle/kernel/process.h" #include "core/hle/kernel/shared_memory.h" #include "core/hle/kernel/thread.h" @@ -37,7 +38,7 @@ static Kernel::SharedPtr<Kernel::Mutex> lock; static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event -static std::vector<u8> shared_font; +static std::shared_ptr<std::vector<u8>> shared_font; static u32 cpu_percent; ///< CPU time available to the running application @@ -74,11 +75,12 @@ void Initialize(Service::Interface* self) { void GetSharedFont(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - if (!shared_font.empty()) { - // TODO(bunnei): This function shouldn't copy the shared font every time it's called. - // Instead, it should probably map the shared font as RO memory. We don't currently have - // an easy way to do this, but the copy should be sufficient for now. - memcpy(Memory::GetPointer(SHARED_FONT_VADDR), shared_font.data(), shared_font.size()); + if (shared_font != nullptr) { + // TODO(yuriks): This is a hack to keep this working right now even with our completely + // broken shared memory system. + shared_font_mem->fixed_address = SHARED_FONT_VADDR; + Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->fixed_address, + shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared); cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); cmd_buff[1] = RESULT_SUCCESS.raw; // No error @@ -391,7 +393,6 @@ void Init() { // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file // "shared_font.bin" in the Citra "sysdata" directory. - shared_font.clear(); std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT; FileUtil::CreateFullPath(filepath); // Create path if not already created @@ -399,8 +400,8 @@ void Init() { if (file.IsOpen()) { // Read shared font data - shared_font.resize((size_t)file.GetSize()); - file.ReadBytes(shared_font.data(), (size_t)file.GetSize()); + shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize()); + file.ReadBytes(shared_font->data(), shared_font->size()); // Create shared font memory object using Kernel::MemoryPermission; @@ -424,7 +425,7 @@ void Init() { } void Shutdown() { - shared_font.clear(); + shared_font = nullptr; shared_font_mem = nullptr; lock = nullptr; notification_event = nullptr; diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index e93c1b436..c3d0d28a5 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -3,8 +3,8 @@ // Refer to the license.txt file included. #include "common/bit_field.h" +#include "common/microprofile.h" -#include "core/mem_map.h" #include "core/memory.h" #include "core/hle/kernel/event.h" #include "core/hle/kernel/shared_memory.h" @@ -230,6 +230,10 @@ void SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) { if (Pica::g_debug_context) Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr); + + if (screen_id == 0) { + MicroProfileFlip(); + } } /** @@ -418,7 +422,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { case CommandId::SET_DISPLAY_TRANSFER: { - auto& params = command.image_copy; + auto& params = command.display_transfer; WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), @@ -433,17 +437,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { // TODO: Check if texture copies are implemented correctly.. case CommandId::SET_TEXTURE_COPY: { - auto& params = command.image_copy; - WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), + auto& params = command.texture_copy; + WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address), Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); - WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), + WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address), Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); - WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); - WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); - WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags); - - // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1? - WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); + WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size), + params.size); + WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size), + params.in_width_gap); + WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size), + params.out_width_gap); + WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), + params.flags); + + // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter. + WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1); break; } diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h index c89d0a467..8bcb30ad1 100644 --- a/src/core/hle/service/gsp_gpu.h +++ b/src/core/hle/service/gsp_gpu.h @@ -127,7 +127,16 @@ struct Command { u32 in_buffer_size; u32 out_buffer_size; u32 flags; - } image_copy; + } display_transfer; + + struct { + u32 in_buffer_address; + u32 out_buffer_address; + u32 size; + u32 in_width_gap; + u32 out_width_gap; + u32 flags; + } texture_copy; u8 raw_data[0x1C]; }; diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp index 6e7dafaad..6b1b71fe4 100644 --- a/src/core/hle/service/y2r_u.cpp +++ b/src/core/hle/service/y2r_u.cpp @@ -10,7 +10,6 @@ #include "core/hle/kernel/event.h" #include "core/hle/service/y2r_u.h" #include "core/hw/y2r.h" -#include "core/mem_map.h" #include "video_core/renderer_base.h" #include "video_core/utils.h" diff --git a/src/core/hle/shared_page.cpp b/src/core/hle/shared_page.cpp index 26d87c7e2..50c5bc01b 100644 --- a/src/core/hle/shared_page.cpp +++ b/src/core/hle/shared_page.cpp @@ -18,7 +18,4 @@ void Init() { shared_page.running_hw = 0x1; // product } -void Shutdown() { -} - } // namespace diff --git a/src/core/hle/shared_page.h b/src/core/hle/shared_page.h index db6a5340b..379bb7b63 100644 --- a/src/core/hle/shared_page.h +++ b/src/core/hle/shared_page.h @@ -54,6 +54,5 @@ static_assert(sizeof(SharedPageDef) == Memory::SHARED_PAGE_SIZE, "Shared page st extern SharedPageDef shared_page; void Init(); -void Shutdown(); } // namespace diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index bb64fdfb7..19f750d72 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -5,16 +5,17 @@ #include <map> #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/profiler.h" #include "common/string_util.h" #include "common/symbols.h" #include "core/core_timing.h" -#include "core/mem_map.h" #include "core/arm/arm_interface.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/event.h" +#include "core/hle/kernel/memory.h" #include "core/hle/kernel/mutex.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" @@ -41,32 +42,114 @@ const ResultCode ERR_NOT_FOUND(ErrorDescription::NotFound, ErrorModule::Kernel, const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E +const ResultCode ERR_MISALIGNED_ADDRESS{ // 0xE0E01BF1 + ErrorDescription::MisalignedAddress, ErrorModule::OS, + ErrorSummary::InvalidArgument, ErrorLevel::Usage}; +const ResultCode ERR_MISALIGNED_SIZE{ // 0xE0E01BF2 + ErrorDescription::MisalignedSize, ErrorModule::OS, + ErrorSummary::InvalidArgument, ErrorLevel::Usage}; +const ResultCode ERR_INVALID_COMBINATION{ // 0xE0E01BEE + ErrorDescription::InvalidCombination, ErrorModule::OS, + ErrorSummary::InvalidArgument, ErrorLevel::Usage}; + enum ControlMemoryOperation { - MEMORY_OPERATION_HEAP = 0x00000003, - MEMORY_OPERATION_GSP_HEAP = 0x00010003, + MEMOP_FREE = 1, + MEMOP_RESERVE = 2, // This operation seems to be unsupported in the kernel + MEMOP_COMMIT = 3, + MEMOP_MAP = 4, + MEMOP_UNMAP = 5, + MEMOP_PROTECT = 6, + MEMOP_OPERATION_MASK = 0xFF, + + MEMOP_REGION_APP = 0x100, + MEMOP_REGION_SYSTEM = 0x200, + MEMOP_REGION_BASE = 0x300, + MEMOP_REGION_MASK = 0xF00, + + MEMOP_LINEAR = 0x10000, }; /// Map application or GSP heap memory static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) { - LOG_TRACE(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=%08X, permissions=0x%08X", + using namespace Kernel; + + LOG_DEBUG(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=0x%X, permissions=0x%08X", operation, addr0, addr1, size, permissions); - switch (operation) { + if ((addr0 & Memory::PAGE_MASK) != 0 || (addr1 & Memory::PAGE_MASK) != 0) { + return ERR_MISALIGNED_ADDRESS; + } + if ((size & Memory::PAGE_MASK) != 0) { + return ERR_MISALIGNED_SIZE; + } + + u32 region = operation & MEMOP_REGION_MASK; + operation &= ~MEMOP_REGION_MASK; + + if (region != 0) { + LOG_WARNING(Kernel_SVC, "ControlMemory with specified region not supported, region=%X", region); + } - // Map normal heap memory - case MEMORY_OPERATION_HEAP: - *out_addr = Memory::MapBlock_Heap(size, operation, permissions); + if ((permissions & (u32)MemoryPermission::ReadWrite) != permissions) { + return ERR_INVALID_COMBINATION; + } + VMAPermission vma_permissions = (VMAPermission)permissions; + + auto& process = *g_current_process; + + switch (operation & MEMOP_OPERATION_MASK) { + case MEMOP_FREE: + { + if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) { + ResultCode result = process.HeapFree(addr0, size); + if (result.IsError()) return result; + } else if (addr0 >= process.GetLinearHeapBase() && addr0 < process.GetLinearHeapLimit()) { + ResultCode result = process.LinearFree(addr0, size); + if (result.IsError()) return result; + } else { + return ERR_INVALID_ADDRESS; + } + *out_addr = addr0; break; + } - // Map GSP heap memory - case MEMORY_OPERATION_GSP_HEAP: - *out_addr = Memory::MapBlock_HeapLinear(size, operation, permissions); + case MEMOP_COMMIT: + { + if (operation & MEMOP_LINEAR) { + CASCADE_RESULT(*out_addr, process.LinearAllocate(addr0, size, vma_permissions)); + } else { + CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions)); + } + break; + } + + case MEMOP_MAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented + { + CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions)); break; + } + + case MEMOP_UNMAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented + { + ResultCode result = process.HeapFree(addr0, size); + if (result.IsError()) return result; + break; + } + + case MEMOP_PROTECT: + { + ResultCode result = process.vm_manager.ReprotectRange(addr0, size, vma_permissions); + if (result.IsError()) return result; + break; + } - // Unknown ControlMemory operation default: LOG_ERROR(Kernel_SVC, "unknown operation=0x%08X", operation); + return ERR_INVALID_COMBINATION; } + + process.vm_manager.LogLayout(Log::Level::Trace); + return RESULT_SUCCESS; } @@ -537,9 +620,9 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* page_inf if (process == nullptr) return ERR_INVALID_HANDLE; - auto vma = process->address_space->FindVMA(addr); + auto vma = process->vm_manager.FindVMA(addr); - if (vma == process->address_space->vma_map.end()) + if (vma == Kernel::g_current_process->vm_manager.vma_map.end()) return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); memory_info->base_address = vma->second.base; @@ -692,6 +775,52 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32 return RESULT_SUCCESS; } +static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) { + LOG_TRACE(Kernel_SVC, "called process=0x%08X type=%u", process_handle, type); + + using Kernel::Process; + Kernel::SharedPtr<Process> process = Kernel::g_handle_table.Get<Process>(process_handle); + if (process == nullptr) + return ERR_INVALID_HANDLE; + + switch (type) { + case 0: + case 2: + // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure + // what's the difference between them. + *out = process->heap_used + process->linear_heap_used + process->misc_memory_used; + break; + case 1: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + // These are valid, but not implemented yet + LOG_ERROR(Kernel_SVC, "unimplemented GetProcessInfo type=%u", type); + break; + case 20: + *out = Memory::FCRAM_PADDR - process->GetLinearHeapBase(); + break; + default: + LOG_ERROR(Kernel_SVC, "unknown GetProcessInfo type=%u", type); + + if (type >= 21 && type <= 23) { + return ResultCode( // 0xE0E01BF4 + ErrorDescription::NotImplemented, ErrorModule::OS, + ErrorSummary::InvalidArgument, ErrorLevel::Usage); + } else { + return ResultCode( // 0xD8E007ED + ErrorDescription::InvalidEnumValue, ErrorModule::Kernel, + ErrorSummary::InvalidArgument, ErrorLevel::Permanent); + } + break; + } + + return RESULT_SUCCESS; +} + namespace { struct FunctionDef { using Func = void(); @@ -746,7 +875,7 @@ static const FunctionDef SVC_Table[] = { {0x28, HLE::Wrap<GetSystemTick>, "GetSystemTick"}, {0x29, nullptr, "GetHandleInfo"}, {0x2A, nullptr, "GetSystemInfo"}, - {0x2B, nullptr, "GetProcessInfo"}, + {0x2B, HLE::Wrap<GetProcessInfo>, "GetProcessInfo"}, {0x2C, nullptr, "GetThreadInfo"}, {0x2D, HLE::Wrap<ConnectToPort>, "ConnectToPort"}, {0x2E, nullptr, "SendSyncRequest1"}, @@ -841,8 +970,11 @@ static const FunctionDef* GetSVCInfo(u32 func_num) { return &SVC_Table[func_num]; } +MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); + void CallSVC(u32 immediate) { Common::Profiling::ScopeTimer timer_svc(profiler_svc); + MICROPROFILE_SCOPE(Kernel_SVC); const FunctionDef* info = GetSVCInfo(immediate); if (info) { diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 3ccbc03b2..bc7bde903 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -3,11 +3,13 @@ // Refer to the license.txt file included. #include <cstring> +#include <numeric> #include <type_traits> #include "common/color.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/vector_math.h" #include "core/settings.h" @@ -84,6 +86,9 @@ static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_ } } +MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255)); +MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100)); + template <typename T> inline void Write(u32 addr, const T data) { addr -= HW::VADDR_GPU; @@ -149,6 +154,8 @@ inline void Write(u32 addr, const T data) { case GPU_REG_INDEX(display_transfer_config.trigger): { + MICROPROFILE_SCOPE(GPU_DisplayTransfer); + const auto& config = g_regs.display_transfer_config; if (config.trigger & 1) { @@ -158,14 +165,59 @@ inline void Write(u32 addr, const T data) { u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); + if (config.is_texture_copy) { + u32 input_width = config.texture_copy.input_width * 16; + u32 input_gap = config.texture_copy.input_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + u32 output_gap = config.texture_copy.output_gap * 16; + + size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); + VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size); + + u32 remaining_size = config.texture_copy.size; + u32 remaining_input = input_width; + u32 remaining_output = output_width; + while (remaining_size > 0) { + u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); + + std::memcpy(dst_pointer, src_pointer, copy_size); + src_pointer += copy_size; + dst_pointer += copy_size; + + remaining_input -= copy_size; + remaining_output -= copy_size; + remaining_size -= copy_size; + + if (remaining_input == 0) { + remaining_input = input_width; + src_pointer += input_gap; + } + if (remaining_output == 0) { + remaining_output = output_width; + dst_pointer += output_gap; + } + } + + LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", + config.texture_copy.size, + config.GetPhysicalInputAddress(), input_width, input_gap, + config.GetPhysicalOutputAddress(), output_width, output_gap, + config.flags); + + size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); + VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size); + + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); + break; + } + if (config.scaling > config.ScaleXY) { LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); UNIMPLEMENTED(); break; } - if (config.output_tiled && - (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) { + if (config.input_linear && config.scaling != config.NoScale) { LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); UNIMPLEMENTED(); break; @@ -182,23 +234,6 @@ inline void Write(u32 addr, const T data) { VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); - if (config.raw_copy) { - // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions - // TODO(Subv): Verify if raw copies perform scaling - memcpy(dst_pointer, src_pointer, output_size); - - LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy", - output_size, - config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), - config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), - config.output_format.Value(), config.flags); - - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); - - VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); - break; - } - for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { Math::Vec4<u8> src_color; @@ -220,7 +255,7 @@ inline void Write(u32 addr, const T data) { u32 src_offset; u32 dst_offset; - if (config.output_tiled) { + if (config.input_linear) { if (!config.dont_swizzle) { // Interpret the input as linear and the output as tiled u32 coarse_y = y & ~7; @@ -315,6 +350,8 @@ inline void Write(u32 addr, const T data) { const auto& config = g_regs.command_processor_config; if (config.trigger & 1) { + MICROPROFILE_SCOPE(GPU_CmdlistProcessing); + u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); if (Pica::g_debug_context && Pica::g_debug_context->recorder) { diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index daad506fe..2e3a9f779 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -201,12 +201,14 @@ struct Regs { u32 flags; BitField< 0, 1, u32> flip_vertically; // flips input data vertically - BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format - BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing + BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format + BitField< 2, 1, u32> crop_input_lines; + BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields BitField< 5, 1, u32> dont_swizzle; BitField< 8, 3, PixelFormat> input_format; BitField<12, 3, PixelFormat> output_format; - + /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. + BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer }; @@ -214,10 +216,30 @@ struct Regs { // it seems that writing to this field triggers the display transfer u32 trigger; + + INSERT_PADDING_WORDS(0x1); + + struct { + u32 size; + + union { + u32 input_size; + + BitField< 0, 16, u32> input_width; + BitField<16, 16, u32> input_gap; + }; + + union { + u32 output_size; + + BitField< 0, 16, u32> output_width; + BitField<16, 16, u32> output_gap; + }; + } texture_copy; } display_transfer_config; - ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); + ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c); - INSERT_PADDING_WORDS(0x331); + INSERT_PADDING_WORDS(0x32D); struct { // command list size (in bytes) diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp deleted file mode 100644 index cbe993fbe..000000000 --- a/src/core/mem_map.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <map> -#include <memory> -#include <utility> -#include <vector> - -#include "common/common_types.h" -#include "common/logging/log.h" - -#include "core/hle/config_mem.h" -#include "core/hle/kernel/vm_manager.h" -#include "core/hle/result.h" -#include "core/hle/shared_page.h" -#include "core/mem_map.h" -#include "core/memory.h" -#include "core/memory_setup.h" - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -namespace Memory { - -namespace { - -struct MemoryArea { - u32 base; - u32 size; - const char* name; -}; - -// We don't declare the IO regions in here since its handled by other means. -static MemoryArea memory_areas[] = { - {HEAP_VADDR, HEAP_SIZE, "Heap"}, // Application heap (main memory) - {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory - {LINEAR_HEAP_VADDR, LINEAR_HEAP_SIZE, "Linear Heap"}, // Linear heap (main memory) - {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) - {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory - {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory -}; - -/// Represents a block of memory mapped by ControlMemory/MapMemoryBlock -struct MemoryBlock { - MemoryBlock() : handle(0), base_address(0), address(0), size(0), operation(0), permissions(0) { - } - u32 handle; - u32 base_address; - u32 address; - u32 size; - u32 operation; - u32 permissions; - - const u32 GetVirtualAddress() const{ - return base_address + address; - } -}; - -static std::map<u32, MemoryBlock> heap_map; -static std::map<u32, MemoryBlock> heap_linear_map; - -} - -u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) { - MemoryBlock block; - - block.base_address = HEAP_VADDR; - block.size = size; - block.operation = operation; - block.permissions = permissions; - - if (heap_map.size() > 0) { - const MemoryBlock last_block = heap_map.rbegin()->second; - block.address = last_block.address + last_block.size; - } - heap_map[block.GetVirtualAddress()] = block; - - return block.GetVirtualAddress(); -} - -u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions) { - MemoryBlock block; - - block.base_address = LINEAR_HEAP_VADDR; - block.size = size; - block.operation = operation; - block.permissions = permissions; - - if (heap_linear_map.size() > 0) { - const MemoryBlock last_block = heap_linear_map.rbegin()->second; - block.address = last_block.address + last_block.size; - } - heap_linear_map[block.GetVirtualAddress()] = block; - - return block.GetVirtualAddress(); -} - -PAddr VirtualToPhysicalAddress(const VAddr addr) { - if (addr == 0) { - return 0; - } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) { - return addr - VRAM_VADDR + VRAM_PADDR; - } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) { - return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR; - } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) { - return addr - DSP_RAM_VADDR + DSP_RAM_PADDR; - } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) { - return addr - IO_AREA_VADDR + IO_AREA_PADDR; - } - - LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08x", addr); - // To help with debugging, set bit on address so that it's obviously invalid. - return addr | 0x80000000; -} - -VAddr PhysicalToVirtualAddress(const PAddr addr) { - if (addr == 0) { - return 0; - } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) { - return addr - VRAM_PADDR + VRAM_VADDR; - } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) { - return addr - FCRAM_PADDR + LINEAR_HEAP_VADDR; - } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) { - return addr - DSP_RAM_PADDR + DSP_RAM_VADDR; - } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) { - return addr - IO_AREA_PADDR + IO_AREA_VADDR; - } - - LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08x", addr); - // To help with debugging, set bit on address so that it's obviously invalid. - return addr | 0x80000000; -} - -void Init() { - InitMemoryMap(); - LOG_DEBUG(HW_Memory, "initialized OK"); -} - -void InitLegacyAddressSpace(Kernel::VMManager& address_space) { - using namespace Kernel; - - for (MemoryArea& area : memory_areas) { - auto block = std::make_shared<std::vector<u8>>(area.size); - address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap(); - } - - auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR, - (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom(); - address_space.Reprotect(cfg_mem_vma, VMAPermission::Read); - - auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, - (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); - address_space.Reprotect(shared_page_vma, VMAPermission::Read); -} - -void Shutdown() { - heap_map.clear(); - heap_linear_map.clear(); - - LOG_DEBUG(HW_Memory, "shutdown OK"); -} - -} // namespace diff --git a/src/core/mem_map.h b/src/core/mem_map.h deleted file mode 100644 index 229ef82c5..000000000 --- a/src/core/mem_map.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Kernel { -class VMManager; -} - -namespace Memory { - -void Init(); -void InitLegacyAddressSpace(Kernel::VMManager& address_space); -void Shutdown(); - -/** - * Maps a block of memory on the heap - * @param size Size of block in bytes - * @param operation Memory map operation type - * @param permissions Memory allocation permissions - */ -u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions); - -/** - * Maps a block of memory on the GSP heap - * @param size Size of block in bytes - * @param operation Memory map operation type - * @param permissions Control memory permissions - */ -u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions); - -/** - * Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical - * address. This should be used by services to translate addresses for use by the hardware. - */ -PAddr VirtualToPhysicalAddress(VAddr addr); - -/** - * Undoes a mapping performed by VirtualToPhysicalAddress(). - */ -VAddr PhysicalToVirtualAddress(PAddr addr); - -} // namespace diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 1f66bb27d..cde390b8a 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -9,7 +9,7 @@ #include "common/logging/log.h" #include "common/swap.h" -#include "core/mem_map.h" +#include "core/hle/kernel/process.h" #include "core/memory.h" #include "core/memory_setup.h" @@ -198,4 +198,42 @@ void WriteBlock(const VAddr addr, const u8* data, const size_t size) { Write8(addr + offset, data[offset]); } +PAddr VirtualToPhysicalAddress(const VAddr addr) { + if (addr == 0) { + return 0; + } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) { + return addr - VRAM_VADDR + VRAM_PADDR; + } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) { + return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR; + } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) { + return addr - DSP_RAM_VADDR + DSP_RAM_PADDR; + } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) { + return addr - IO_AREA_VADDR + IO_AREA_PADDR; + } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) { + return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR; + } + + LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08X", addr); + // To help with debugging, set bit on address so that it's obviously invalid. + return addr | 0x80000000; +} + +VAddr PhysicalToVirtualAddress(const PAddr addr) { + if (addr == 0) { + return 0; + } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) { + return addr - VRAM_PADDR + VRAM_VADDR; + } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) { + return addr - FCRAM_PADDR + Kernel::g_current_process->GetLinearHeapBase(); + } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) { + return addr - DSP_RAM_PADDR + DSP_RAM_VADDR; + } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) { + return addr - IO_AREA_PADDR + IO_AREA_VADDR; + } + + LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08X", addr); + // To help with debugging, set bit on address so that it's obviously invalid. + return addr | 0x80000000; +} + } // namespace diff --git a/src/core/memory.h b/src/core/memory.h index 418609de0..5af72b7a7 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -15,6 +15,8 @@ namespace Memory { * be mapped. */ const u32 PAGE_SIZE = 0x1000; +const u32 PAGE_MASK = PAGE_SIZE - 1; +const int PAGE_BITS = 12; /// Physical memory regions as seen from the ARM11 enum : PAddr { @@ -103,8 +105,15 @@ enum : VAddr { // hardcoded value. /// Area where TLS (Thread-Local Storage) buffers are allocated. TLS_AREA_VADDR = 0x1FF82000, - TLS_AREA_SIZE = 0x00030000, // Each TLS buffer is 0x200 bytes, allows for 300 threads + TLS_ENTRY_SIZE = 0x200, + TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, + + + /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. + NEW_LINEAR_HEAP_VADDR = 0x30000000, + NEW_LINEAR_HEAP_SIZE = 0x10000000, + NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE, }; u8 Read8(VAddr addr); @@ -122,6 +131,17 @@ void WriteBlock(VAddr addr, const u8* data, size_t size); u8* GetPointer(VAddr virtual_address); /** +* Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical +* address. This should be used by services to translate addresses for use by the hardware. +*/ +PAddr VirtualToPhysicalAddress(VAddr addr); + +/** +* Undoes a mapping performed by VirtualToPhysicalAddress(). +*/ +VAddr PhysicalToVirtualAddress(PAddr addr); + +/** * Gets a pointer to the memory region beginning at the specified physical address. * * @note This is currently implemented using PhysicalToVirtualAddress(). diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h index 361bfc816..84ff30120 100644 --- a/src/core/memory_setup.h +++ b/src/core/memory_setup.h @@ -10,9 +10,6 @@ namespace Memory { -const u32 PAGE_MASK = PAGE_SIZE - 1; -const int PAGE_BITS = 12; - void InitMemoryMap(); /** diff --git a/src/core/system.cpp b/src/core/system.cpp index 561ff82f0..3cd84bf5e 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -4,11 +4,11 @@ #include "core/core.h" #include "core/core_timing.h" -#include "core/mem_map.h" #include "core/system.h" #include "core/hw/hw.h" #include "core/hle/hle.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/memory.h" #include "video_core/video_core.h" @@ -29,7 +29,6 @@ void Shutdown() { HLE::Shutdown(); Kernel::Shutdown(); HW::Shutdown(); - Memory::Shutdown(); CoreTiming::Shutdown(); Core::Shutdown(); } diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d82e20f86..a78985510 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -4,6 +4,7 @@ #include <boost/range/algorithm/fill.hpp> +#include "common/microprofile.h" #include "common/profiler.h" #include "core/hle/service/gsp_gpu.h" @@ -43,6 +44,8 @@ static const u32 expand_bits_to_bytes[] = { 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff }; +MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); + static void WritePicaReg(u32 id, u32 value, u32 mask) { auto& regs = g_state.regs; @@ -126,6 +129,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(trigger_draw_indexed): { Common::Profiling::ScopeTimer scope_timer(category_drawing); + MICROPROFILE_SCOPE(GPU_Drawing); #if PICA_LOG_TEV DebugUtils::DumpTevStageConfig(regs.GetTevStages()); diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 8ad77f0c8..059445f7d 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -25,6 +25,8 @@ #include "common/math_util.h" #include "common/vector_math.h" +#include "core/settings.h" + #include "video_core/pica.h" #include "video_core/renderer_base.h" #include "video_core/utils.h" @@ -45,8 +47,10 @@ void DebugContext::OnEvent(Event event, void* data) { { std::unique_lock<std::mutex> lock(breakpoint_mutex); - // Commit the hardware renderer's framebuffer so it will show on debug widgets - VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer(); + if (Settings::values.use_hw_renderer) { + // Commit the hardware renderer's framebuffer so it will show on debug widgets + VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer(); + } // TODO: Should stop the CPU thread here once we multithread emulation. diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 87cf705e7..f40684d83 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -1033,12 +1033,20 @@ struct float24 { return ret; } + static float24 Zero() { + return FromFloat32(0.f); + } + // Not recommended for anything but logging float ToFloat32() const { return value; } float24 operator * (const float24& flt) const { + if ((this->value == 0.f && !std::isnan(flt.value)) || + (flt.value == 0.f && !std::isnan(this->value))) + // PICA gives 0 instead of NaN when multiplying by inf + return Zero(); return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); } @@ -1055,7 +1063,11 @@ struct float24 { } float24& operator *= (const float24& flt) { - value *= flt.ToFloat32(); + if ((this->value == 0.f && !std::isnan(flt.value)) || + (flt.value == 0.f && !std::isnan(this->value))) + // PICA gives 0 instead of NaN when multiplying by inf + *this = Zero(); + else value *= flt.ToFloat32(); return *this; } diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index b6c0e1bff..77eadda9e 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -7,6 +7,7 @@ #include "common/color.h" #include "common/common_types.h" #include "common/math_util.h" +#include "common/microprofile.h" #include "common/profiler.h" #include "core/hw/gpu.h" @@ -286,6 +287,7 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, }; static Common::Profiling::TimingCategory rasterization_category("Rasterization"); +MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); /** * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing @@ -298,6 +300,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, { const auto& regs = g_state.regs; Common::Profiling::ScopeTimer timer(rasterization_category); + MICROPROFILE_SCOPE(GPU_Rasterization); // vertex positions in rasterizer coordinates static auto FloatToFix = [](float24 flt) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c3829d5c6..d29049508 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -7,6 +7,7 @@ #include "common/color.h" #include "common/math_util.h" +#include "common/microprofile.h" #include "common/profiler.h" #include "core/hw/gpu.h" @@ -230,8 +231,8 @@ void RasterizerOpenGL::DrawTriangles() { u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); - res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size); - res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size); + res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size, true); + res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size, true); } void RasterizerOpenGL::CommitFramebuffer() { @@ -786,12 +787,16 @@ void RasterizerOpenGL::SyncDrawState() { state.Apply(); } +MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); + void RasterizerOpenGL::ReloadColorBuffer() { u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); if (color_buffer == nullptr) return; + MICROPROFILE_SCOPE(OpenGL_FramebufferReload); + u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]); @@ -831,6 +836,8 @@ void RasterizerOpenGL::ReloadDepthBuffer() { if (depth_buffer == nullptr) return; + MICROPROFILE_SCOPE(OpenGL_FramebufferReload); + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); // OpenGL needs 4 bpp alignment for D24 @@ -884,6 +891,7 @@ void RasterizerOpenGL::ReloadDepthBuffer() { } Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit"); +MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); void RasterizerOpenGL::CommitColorBuffer() { if (last_fb_color_addr != 0) { @@ -891,6 +899,7 @@ void RasterizerOpenGL::CommitColorBuffer() { if (color_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); + MICROPROFILE_SCOPE(OpenGL_FramebufferCommit); u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format); @@ -927,6 +936,7 @@ void RasterizerOpenGL::CommitDepthBuffer() { if (depth_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); + MICROPROFILE_SCOPE(OpenGL_FramebufferCommit); u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 70f0ba5f1..1e38c2e6d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/hash.h" #include "common/make_unique.h" #include "common/math_util.h" +#include "common/microprofile.h" #include "common/vector_math.h" #include "core/memory.h" @@ -16,15 +18,18 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FullFlush(); } +MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); + void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { PAddr texture_addr = config.config.GetPhysicalAddress(); - const auto cached_texture = texture_cache.find(texture_addr); if (cached_texture != texture_cache.end()) { state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; state.Apply(); } else { + MICROPROFILE_SCOPE(OpenGL_TextureUpload); + std::unique_ptr<CachedTexture> new_texture = Common::make_unique<CachedTexture>(); new_texture->texture.Create(); @@ -46,12 +51,14 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text } const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); + u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr); new_texture->width = info.width; new_texture->height = info.height; - new_texture->size = info.width * info.height * Pica::Regs::NibblesPerPixel(info.format); + new_texture->size = info.stride * info.height; + new_texture->addr = texture_addr; + new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size); - u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr); std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); for (int y = 0; y < info.height; ++y) { @@ -66,12 +73,18 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text } } -void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size) { +void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size, bool ignore_hash) { // Flush any texture that falls in the flushed region // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound auto cache_upper_bound = texture_cache.upper_bound(addr + size); + for (auto it = texture_cache.begin(); it != cache_upper_bound;) { - if (MathUtil::IntervalsIntersect(addr, size, it->first, it->second->size)) { + const auto& info = *it->second; + + // Flush the texture only if the memory region intersects and a change is detected + if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && + (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { + it = texture_cache.erase(it); } else { ++it; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 96f3a925c..d8f9edf59 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -19,7 +19,7 @@ public: void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config); /// Flush any cached resource that touches the flushed region - void NotifyFlush(PAddr addr, u32 size); + void NotifyFlush(PAddr addr, u32 size, bool ignore_hash = false); /// Flush all cached OpenGL resources tracked by this cache manager void FullFlush(); @@ -30,6 +30,8 @@ private: GLuint width; GLuint height; u32 size; + u64 hash; + PAddr addr; }; std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 4e9836c80..f89117521 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -9,6 +9,7 @@ #include "common/hash.h" #include "common/make_unique.h" +#include "common/microprofile.h" #include "common/profiler.h" #include "video_core/debug_utils/debug_utils.h" @@ -51,15 +52,19 @@ void Setup(UnitState<false>& state) { } void Shutdown() { +#ifdef ARCHITECTURE_x86_64 shader_map.clear(); +#endif // ARCHITECTURE_x86_64 } static Common::Profiling::TimingCategory shader_category("Vertex Shader"); +MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; Common::Profiling::ScopeTimer timer(shader_category); + MICROPROFILE_SCOPE(GPU_VertexShader); state.program_counter = config.main_offset; state.debug.max_offset = 0; diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index e14de0768..69e4efa68 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -177,7 +177,10 @@ void RunInterpreter(UnitState<Debug>& state) { if (!swizzle.DestComponentEnabled(i)) continue; - dest[i] = std::max(src1[i], src2[i]); + // NOTE: Exact form required to match NaN semantics to hardware: + // max(0, NaN) -> NaN + // max(NaN, 0) -> 0 + dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i]; } Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); break; @@ -190,19 +193,29 @@ void RunInterpreter(UnitState<Debug>& state) { if (!swizzle.DestComponentEnabled(i)) continue; - dest[i] = std::min(src1[i], src2[i]); + // NOTE: Exact form required to match NaN semantics to hardware: + // min(0, NaN) -> NaN + // min(NaN, 0) -> 0 + dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i]; } Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); break; case OpCode::Id::DP3: case OpCode::Id::DP4: + case OpCode::Id::DPH: + case OpCode::Id::DPHI: { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + + OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); + if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) + src1[3] = float24::FromFloat32(1.0f); + float24 dot = float24::FromFloat32(0.f); - int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; + int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) dot = dot + src1[i] * src2[i]; @@ -221,13 +234,12 @@ void RunInterpreter(UnitState<Debug>& state) { { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; - // TODO: Be stable against division by zero! - // TODO: I think this might be wrong... we should only use one component here - dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); + dest[i] = rcp_res; } Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); break; @@ -238,13 +250,12 @@ void RunInterpreter(UnitState<Debug>& state) { { Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; - // TODO: Be stable against division by zero! - // TODO: I think this might be wrong... we should only use one component here - dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); + dest[i] = rsq_res; } Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); break; @@ -278,6 +289,20 @@ void RunInterpreter(UnitState<Debug>& state) { break; } + case OpCode::Id::SGE: + case OpCode::Id::SGEI: + Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); + Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); + Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + } + Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + break; + case OpCode::Id::SLT: case OpCode::Id::SLTI: Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); @@ -334,6 +359,42 @@ void RunInterpreter(UnitState<Debug>& state) { Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); break; + case OpCode::Id::EX2: + { + Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); + Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + + // EX2 only takes first component exp2 and writes it to all dest components + float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32())); + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = ex2_res; + } + + Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + break; + } + + case OpCode::Id::LG2: + { + Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); + Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); + + // LG2 only takes the first component log2 and writes it to all dest components + float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32())); + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = lg2_res; + } + + Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); + break; + } + default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 836942c6b..d3cfe109e 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -23,14 +23,14 @@ const JitFunction instr_table[64] = { &JitCompiler::Compile_ADD, // add &JitCompiler::Compile_DP3, // dp3 &JitCompiler::Compile_DP4, // dp4 - nullptr, // dph + &JitCompiler::Compile_DPH, // dph nullptr, // unknown - nullptr, // ex2 - nullptr, // lg2 + &JitCompiler::Compile_EX2, // ex2 + &JitCompiler::Compile_LG2, // lg2 nullptr, // unknown &JitCompiler::Compile_MUL, // mul - nullptr, // lge - nullptr, // slt + &JitCompiler::Compile_SGE, // sge + &JitCompiler::Compile_SLT, // slt &JitCompiler::Compile_FLR, // flr &JitCompiler::Compile_MAX, // max &JitCompiler::Compile_MIN, // min @@ -44,10 +44,10 @@ const JitFunction instr_table[64] = { nullptr, // unknown nullptr, // unknown nullptr, // unknown - nullptr, // dphi + &JitCompiler::Compile_DPH, // dphi nullptr, // unknown - nullptr, // sgei - &JitCompiler::Compile_SLTI, // slti + &JitCompiler::Compile_SGE, // sgei + &JitCompiler::Compile_SLT, // slti nullptr, // unknown nullptr, // unknown nullptr, // unknown @@ -115,6 +115,8 @@ static const X64Reg SRC1 = XMM1; static const X64Reg SRC2 = XMM2; /// Loaded with the third swizzled source register, otherwise can be used as a scratch register static const X64Reg SRC3 = XMM3; +/// Additional scratch register +static const X64Reg SCRATCH2 = XMM4; /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one static const X64Reg ONE = XMM14; /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR @@ -227,8 +229,8 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); BLENDPS(SCRATCH, R(src), mask); } else { - MOVAPS(XMM4, R(src)); - UNPCKHPS(XMM4, R(SCRATCH)); // Unpack X/Y components of source and destination + MOVAPS(SCRATCH2, R(src)); + UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination // Compute selector to selectively copy source components to destination for SHUFPS instruction @@ -236,7 +238,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) | ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) | ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6); - SHUFPS(SCRATCH, R(XMM4), sel); + SHUFPS(SCRATCH, R(SCRATCH2), sel); } // Store dest back to memory @@ -244,6 +246,19 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { } } +void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { + MOVAPS(scratch, R(src1)); + CMPPS(scratch, R(src2), CMP_ORD); + + MULPS(src1, R(src2)); + + MOVAPS(src2, R(src1)); + CMPPS(src2, R(src2), CMP_UNORD); + + XORPS(scratch, R(src2)); + ANDPS(src1, R(scratch)); +} + void JitCompiler::Compile_EvaluateCondition(Instruction instr) { // Note: NXOR is used below to check for equality switch (instr.flow_control.op) { @@ -280,6 +295,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) { CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); } +void JitCompiler::Compile_PushCallerSavedXMM() { +#ifndef _WIN32 + SUB(64, R(RSP), Imm8(2 * 16)); + MOVUPS(MDisp(RSP, 16), ONE); + MOVUPS(MDisp(RSP, 0), NEGBIT); +#endif +} + +void JitCompiler::Compile_PopCallerSavedXMM() { +#ifndef _WIN32 + MOVUPS(NEGBIT, MDisp(RSP, 0)); + MOVUPS(ONE, MDisp(RSP, 16)); + ADD(64, R(RSP), Imm8(2 * 16)); +#endif +} + void JitCompiler::Compile_ADD(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); @@ -291,21 +322,17 @@ void JitCompiler::Compile_DP3(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); - if (Common::GetCPUCaps().sse4_1) { - DPPS(SRC1, R(SRC2), 0x7f); - } else { - MULPS(SRC1, R(SRC2)); + Compile_SanitizedMul(SRC1, SRC2, SCRATCH); - MOVAPS(SRC2, R(SRC1)); - SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1)); + MOVAPS(SRC2, R(SRC1)); + SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1)); - MOVAPS(SRC3, R(SRC1)); - SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2)); + MOVAPS(SRC3, R(SRC1)); + SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2)); - SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); - ADDPS(SRC1, R(SRC2)); - ADDPS(SRC1, R(SRC3)); - } + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); + ADDPS(SRC1, R(SRC2)); + ADDPS(SRC1, R(SRC3)); Compile_DestEnable(instr, SRC1); } @@ -314,27 +341,117 @@ void JitCompiler::Compile_DP4(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + Compile_SanitizedMul(SRC1, SRC2, SCRATCH); + + MOVAPS(SRC2, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY + ADDPS(SRC1, R(SRC2)); + + MOVAPS(SRC2, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX + ADDPS(SRC1, R(SRC2)); + + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_DPH(Instruction instr) { + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { + Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); + } else { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + } + if (Common::GetCPUCaps().sse4_1) { - DPPS(SRC1, R(SRC2), 0xff); + // Set 4th component to 1.0 + BLENDPS(SRC1, R(ONE), 0x8); // 0b1000 } else { - MULPS(SRC1, R(SRC2)); + // Set 4th component to 1.0 + MOVAPS(SCRATCH, R(SRC1)); + UNPCKHPS(SCRATCH, R(ONE)); // XYZW, 1111 -> Z1__ + UNPCKLPD(SRC1, R(SCRATCH)); // XYZW, Z1__ -> XYZ1 + } - MOVAPS(SRC2, R(SRC1)); - SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY - ADDPS(SRC1, R(SRC2)); + Compile_SanitizedMul(SRC1, SRC2, SCRATCH); - MOVAPS(SRC2, R(SRC1)); - SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX - ADDPS(SRC1, R(SRC2)); - } + MOVAPS(SRC2, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY + ADDPS(SRC1, R(SRC2)); + + MOVAPS(SRC2, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX + ADDPS(SRC1, R(SRC2)); + + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_EX2(Instruction instr) { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + MOVSS(XMM0, R(SRC1)); + // The following will actually break the stack alignment + ABI_PushAllCallerSavedRegsAndAdjustStack(); + Compile_PushCallerSavedXMM(); + ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); + Compile_PopCallerSavedXMM(); + ABI_PopAllCallerSavedRegsAndAdjustStack(); + + SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); + MOVAPS(SRC1, R(XMM0)); + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_LG2(Instruction instr) { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + MOVSS(XMM0, R(SRC1)); + + // The following will actually break the stack alignment + ABI_PushAllCallerSavedRegsAndAdjustStack(); + Compile_PushCallerSavedXMM(); + ABI_CallFunction(reinterpret_cast<const void*>(log2f)); + Compile_PopCallerSavedXMM(); + ABI_PopAllCallerSavedRegsAndAdjustStack(); + + SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); + MOVAPS(SRC1, R(XMM0)); Compile_DestEnable(instr, SRC1); } void JitCompiler::Compile_MUL(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); - MULPS(SRC1, R(SRC2)); + Compile_SanitizedMul(SRC1, SRC2, SCRATCH); + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_SGE(Instruction instr) { + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { + Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); + } else { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + } + + CMPPS(SRC1, R(SRC2), CMP_NLT); + ANDPS(SRC1, R(ONE)); + + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_SLT(Instruction instr) { + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { + Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); + } else { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + } + + CMPPS(SRC1, R(SRC2), CMP_LT); + ANDPS(SRC1, R(ONE)); + Compile_DestEnable(instr, SRC1); } @@ -354,6 +471,7 @@ void JitCompiler::Compile_FLR(Instruction instr) { void JitCompiler::Compile_MAX(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. MAXPS(SRC1, R(SRC2)); Compile_DestEnable(instr, SRC1); } @@ -361,6 +479,7 @@ void JitCompiler::Compile_MAX(Instruction instr) { void JitCompiler::Compile_MIN(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. MINPS(SRC1, R(SRC2)); Compile_DestEnable(instr, SRC1); } @@ -374,8 +493,8 @@ void JitCompiler::Compile_MOVA(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); - // Convert floats to integers (only care about X and Y components) - CVTPS2DQ(SRC1, R(SRC1)); + // Convert floats to integers using truncation (only care about X and Y components) + CVTTPS2DQ(SRC1, R(SRC1)); // Get result MOVQ_xmm(R(RAX), SRC1); @@ -415,22 +534,13 @@ void JitCompiler::Compile_MOV(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_SLTI(Instruction instr) { - Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); - Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2); - - CMPSS(SRC1, R(SRC2), CMP_LT); - ANDPS(SRC1, R(ONE)); - - Compile_DestEnable(instr, SRC1); -} - void JitCompiler::Compile_RCP(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); - // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica + // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica // performs this operation more accurately. This should be checked on hardware. - RCPPS(SRC1, R(SRC1)); + RCPSS(SRC1, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX Compile_DestEnable(instr, SRC1); } @@ -438,9 +548,10 @@ void JitCompiler::Compile_RCP(Instruction instr) { void JitCompiler::Compile_RSQ(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); - // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica + // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica // performs this operation more accurately. This should be checked on hardware. - RSQRTPS(SRC1, R(SRC1)); + RSQRTSS(SRC1, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX Compile_DestEnable(instr, SRC1); } @@ -475,27 +586,42 @@ void JitCompiler::Compile_CALLU(Instruction instr) { } void JitCompiler::Compile_CMP(Instruction instr) { + using Op = Instruction::Common::CompareOpType::Op; + Op op_x = instr.common.compare_op.x; + Op op_y = instr.common.compare_op.y; + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); - static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_NLE, CMP_NLT }; + // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to + // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here + // because they don't match when used with NaNs. + static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE }; + + bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual); + Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1; + Gen::X64Reg rhs_x = invert_op_x ? SRC1 : SRC2; - if (instr.common.compare_op.x == instr.common.compare_op.y) { + if (op_x == op_y) { // Compare X-component and Y-component together - CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.x]); + CMPPS(lhs_x, R(rhs_x), cmp[op_x]); + MOVQ_xmm(R(COND0), lhs_x); - MOVQ_xmm(R(COND0), SRC1); MOV(64, R(COND1), R(COND0)); } else { + bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual); + Gen::X64Reg lhs_y = invert_op_y ? SRC2 : SRC1; + Gen::X64Reg rhs_y = invert_op_y ? SRC1 : SRC2; + // Compare X-component - MOVAPS(SCRATCH, R(SRC1)); - CMPSS(SCRATCH, R(SRC2), cmp[instr.common.compare_op.x]); + MOVAPS(SCRATCH, R(lhs_x)); + CMPSS(SCRATCH, R(rhs_x), cmp[op_x]); // Compare Y-component - CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.y]); + CMPPS(lhs_y, R(rhs_y), cmp[op_y]); MOVQ_xmm(R(COND0), SCRATCH); - MOVQ_xmm(R(COND1), SRC1); + MOVQ_xmm(R(COND1), lhs_y); } SHR(32, R(COND0), Imm8(31)); @@ -513,12 +639,8 @@ void JitCompiler::Compile_MAD(Instruction instr) { Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3); } - if (Common::GetCPUCaps().fma) { - VFMADD213PS(SRC1, SRC2, R(SRC3)); - } else { - MULPS(SRC1, R(SRC2)); - ADDPS(SRC1, R(SRC3)); - } + Compile_SanitizedMul(SRC1, SRC2, SCRATCH); + ADDPS(SRC1, R(SRC3)); Compile_DestEnable(instr, SRC1); } @@ -646,12 +768,12 @@ CompiledShader* JitCompiler::Compile() { // Used to set a register to one static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; MOV(PTRBITS, R(RAX), ImmPtr(&one)); - MOVAPS(ONE, MDisp(RAX, 0)); + MOVAPS(ONE, MatR(RAX)); // Used to negate registers static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; MOV(PTRBITS, R(RAX), ImmPtr(&neg)); - MOVAPS(NEGBIT, MDisp(RAX, 0)); + MOVAPS(NEGBIT, MatR(RAX)); looping = false; diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index b88f2a0d2..58828ecc8 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -37,7 +37,12 @@ public: void Compile_ADD(Instruction instr); void Compile_DP3(Instruction instr); void Compile_DP4(Instruction instr); + void Compile_DPH(Instruction instr); + void Compile_EX2(Instruction instr); + void Compile_LG2(Instruction instr); void Compile_MUL(Instruction instr); + void Compile_SGE(Instruction instr); + void Compile_SLT(Instruction instr); void Compile_FLR(Instruction instr); void Compile_MAX(Instruction instr); void Compile_MIN(Instruction instr); @@ -45,7 +50,6 @@ public: void Compile_RSQ(Instruction instr); void Compile_MOVA(Instruction instr); void Compile_MOV(Instruction instr); - void Compile_SLTI(Instruction instr); void Compile_NOP(Instruction instr); void Compile_END(Instruction instr); void Compile_CALL(Instruction instr); @@ -64,9 +68,18 @@ private: void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); + /** + * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying + * zero by inf. Clobbers `src2` and `scratch`. + */ + void Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch); + void Compile_EvaluateCondition(Instruction instr); void Compile_UniformCondition(Instruction instr); + void Compile_PushCallerSavedXMM(); + void Compile_PopCallerSavedXMM(); + /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. unsigned* offset_ptr = nullptr; |