From 6e7cad63b4f0ac3d3fb84d230125b413fbb2d011 Mon Sep 17 00:00:00 2001 From: Judah Caruso Date: Sat, 17 May 2025 02:20:40 -0600 Subject: [PATCH] add basic remotery bindings --- remotery/examples/it-works.jai | 50 + remotery/generate.jai | 43 + remotery/lib/Remotery.c | 11107 ++++++++++++++++ remotery/lib/Remotery.h | 1216 ++ remotery/lib/RemoteryMetal.mm | 59 + remotery/mac/remotery.a | Bin 0 -> 92008 bytes remotery/mac/remotery.dylib | Bin 0 -> 95024 bytes remotery/module.jai | 24 + remotery/remotery.jai | 799 ++ remotery/vis/Code/Console.js | 218 + remotery/vis/Code/DataViewReader.js | 94 + remotery/vis/Code/GLCanvas.js | 123 + remotery/vis/Code/GridWindow.js | 291 + remotery/vis/Code/MouseInteraction.js | 106 + remotery/vis/Code/NameMap.js | 53 + remotery/vis/Code/PixelTimeRange.js | 61 + remotery/vis/Code/Remotery.js | 739 + remotery/vis/Code/SampleGlobals.js | 28 + remotery/vis/Code/Shaders/Grid.js | 162 + remotery/vis/Code/Shaders/Shared.js | 154 + remotery/vis/Code/Shaders/Timeline.js | 337 + remotery/vis/Code/Shaders/Window.js | 33 + remotery/vis/Code/ThreadFrame.js | 34 + remotery/vis/Code/TimelineMarkers.js | 186 + remotery/vis/Code/TimelineRow.js | 400 + remotery/vis/Code/TimelineWindow.js | 496 + remotery/vis/Code/TitleWindow.js | 105 + remotery/vis/Code/TraceDrop.js | 147 + remotery/vis/Code/WebGL.js | 252 + remotery/vis/Code/WebGLFont.js | 125 + remotery/vis/Code/WebSocketConnection.js | 149 + .../Fonts/FiraCode/FiraCode-Regular.ttf | Bin 0 -> 299152 bytes remotery/vis/Styles/Fonts/FiraCode/LICENSE | 93 + remotery/vis/Styles/Remotery.css | 237 + .../extern/BrowserLib/Core/Code/Animation.js | 65 + .../vis/extern/BrowserLib/Core/Code/Bind.js | 92 + .../extern/BrowserLib/Core/Code/Convert.js | 218 + .../vis/extern/BrowserLib/Core/Code/Core.js | 26 + .../vis/extern/BrowserLib/Core/Code/DOM.js | 526 + .../extern/BrowserLib/Core/Code/Keyboard.js | 149 + .../extern/BrowserLib/Core/Code/LocalStore.js | 40 + .../vis/extern/BrowserLib/Core/Code/Mouse.js | 83 + .../BrowserLib/Core/Code/MurmurHash3.js | 68 + .../BrowserLib/WindowManager/Code/Button.js | 131 + .../BrowserLib/WindowManager/Code/ComboBox.js | 237 + .../WindowManager/Code/Container.js | 48 + .../BrowserLib/WindowManager/Code/EditBox.js | 119 + .../BrowserLib/WindowManager/Code/Grid.js | 248 + .../BrowserLib/WindowManager/Code/Label.js | 31 + .../BrowserLib/WindowManager/Code/Treeview.js | 352 + .../WindowManager/Code/TreeviewItem.js | 109 + .../BrowserLib/WindowManager/Code/Window.js | 318 + .../WindowManager/Code/WindowManager.js | 65 + .../WindowManager/Styles/WindowManager.css | 652 + remotery/vis/index.html | 69 + 55 files changed, 21567 insertions(+) create mode 100644 remotery/examples/it-works.jai create mode 100644 remotery/generate.jai create mode 100644 remotery/lib/Remotery.c create mode 100644 remotery/lib/Remotery.h create mode 100644 remotery/lib/RemoteryMetal.mm create mode 100644 remotery/mac/remotery.a create mode 100755 remotery/mac/remotery.dylib create mode 100644 remotery/module.jai create mode 100644 remotery/remotery.jai create mode 100644 remotery/vis/Code/Console.js create mode 100644 remotery/vis/Code/DataViewReader.js create mode 100644 remotery/vis/Code/GLCanvas.js create mode 100644 remotery/vis/Code/GridWindow.js create mode 100644 remotery/vis/Code/MouseInteraction.js create mode 100644 remotery/vis/Code/NameMap.js create mode 100644 remotery/vis/Code/PixelTimeRange.js create mode 100644 remotery/vis/Code/Remotery.js create mode 100644 remotery/vis/Code/SampleGlobals.js create mode 100644 remotery/vis/Code/Shaders/Grid.js create mode 100644 remotery/vis/Code/Shaders/Shared.js create mode 100644 remotery/vis/Code/Shaders/Timeline.js create mode 100644 remotery/vis/Code/Shaders/Window.js create mode 100644 remotery/vis/Code/ThreadFrame.js create mode 100644 remotery/vis/Code/TimelineMarkers.js create mode 100644 remotery/vis/Code/TimelineRow.js create mode 100644 remotery/vis/Code/TimelineWindow.js create mode 100644 remotery/vis/Code/TitleWindow.js create mode 100644 remotery/vis/Code/TraceDrop.js create mode 100644 remotery/vis/Code/WebGL.js create mode 100644 remotery/vis/Code/WebGLFont.js create mode 100644 remotery/vis/Code/WebSocketConnection.js create mode 100644 remotery/vis/Styles/Fonts/FiraCode/FiraCode-Regular.ttf create mode 100644 remotery/vis/Styles/Fonts/FiraCode/LICENSE create mode 100644 remotery/vis/Styles/Remotery.css create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/Animation.js create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/Bind.js create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/Convert.js create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/Core.js create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/DOM.js create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/Keyboard.js create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/LocalStore.js create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/Mouse.js create mode 100644 remotery/vis/extern/BrowserLib/Core/Code/MurmurHash3.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/Button.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/ComboBox.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/Container.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/EditBox.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/Grid.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/Label.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/Treeview.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/TreeviewItem.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/Window.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Code/WindowManager.js create mode 100644 remotery/vis/extern/BrowserLib/WindowManager/Styles/WindowManager.css create mode 100644 remotery/vis/index.html diff --git a/remotery/examples/it-works.jai b/remotery/examples/it-works.jai new file mode 100644 index 0000000..36a7b2b --- /dev/null +++ b/remotery/examples/it-works.jai @@ -0,0 +1,50 @@ +main :: () { + r: *rmt.Remotery; + + err := rmt.CreateGlobalInstance(*r); + assert(err == .NONE, "%", err); + defer rmt.DestroyGlobalInstance(r); + + rmt.LogText("start profile"); + for 0..100 { + delay(); + sleep_milliseconds(100); + } + rmt.LogText("end profile"); +} + +delay :: () { + rmt.BeginCPUSample("delay", 0, null); + defer rmt.EndCPUSample(); + + j := 0.0; + for 0..1000 { + j += sin(it.(float32)); + } + + recurse(); + aggregate(); + aggregate(); + aggregate(); +} + +recurse :: (depth := 0) { + rmt.BeginCPUSample("recurse", xx rmt.SampleFlags.Recursive, null); + defer rmt.EndCPUSample(); + + sleep_milliseconds(100); + + if depth < 5 { + recurse(depth + 1); + } +} + +aggregate :: () { + rmt.BeginCPUSample("aggregate", xx rmt.SampleFlags.Aggregate, null); + rmt.EndCPUSample(); +} + +#import "Math"; +#import "Basic"; + +rmt :: #import,file "../module.jai"; diff --git a/remotery/generate.jai b/remotery/generate.jai new file mode 100644 index 0000000..ac0f1b6 --- /dev/null +++ b/remotery/generate.jai @@ -0,0 +1,43 @@ +#scope_file; + +LOWERCASE_FIELD_NAMES :: true; + +#run { + set_build_options_dc(.{ do_output = false }); + + print("building library\n"); + + #if OS == { + case .WINDOWS; + lib_ext :: "dll"; + out_base :: "win"; + case .MACOS; + lib_ext :: "dylib"; + out_base :: "mac"; + case .LINUX; + lib_ext :: "so"; + out_base :: "linux"; + } + + out_path := tprint("%/%", out_base, "remotery"); + assert(build_cpp(out_path, "lib/Remotery.c", type = .DYNAMIC_LIBRARY)); + assert(build_cpp(out_path, "lib/Remotery.c", type = .STATIC_LIBRARY)); + + print("generating bindings\n"); + opts: Generate_Bindings_Options; + opts.add_generator_command = false; + opts.generate_library_declarations = false; + array_add(*opts.strip_prefixes, "rmt_", "RMT_", "rmt", "RMT", "_rmt", "_RMT"); + array_add(*opts.extra_clang_arguments, "-x", "c"); + array_add(*opts.libpaths, out_base); + array_add(*opts.libnames, tprint("remotery.%", lib_ext)); + array_add(*opts.source_files, "./lib/remotery.h"); + assert(generate_bindings(opts, "remotery.jai")); +} + +#import "File"; +#import "Basic"; +#import "String"; +#import "Compiler"; +#import "BuildCpp"; +#import "Bindings_Generator"; diff --git a/remotery/lib/Remotery.c b/remotery/lib/Remotery.c new file mode 100644 index 0000000..b91deb3 --- /dev/null +++ b/remotery/lib/Remotery.c @@ -0,0 +1,11107 @@ +// +// Copyright 2014-2022 Celtoys Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +/* +@Contents: + + @DEPS: External Dependencies + @TIMERS: Platform-specific timers + @TLS: Thread-Local Storage + @ERROR: Error handling + @ATOMIC: Atomic Operations + @RNG: Random Number Generator + @LFSR: Galois Linear-feedback Shift Register + @VMBUFFER: Mirror Buffer using Virtual Memory for auto-wrap + @NEW: New/Delete operators with error values for simplifying object create/destroy + @SAFEC: Safe C Library excerpts + @OSTHREADS: Wrappers around OS-specific thread functions + @THREADS: Cross-platform thread object + @OBJALLOC: Reusable Object Allocator + @DYNBUF: Dynamic Buffer + @HASHTABLE: Integer pair hash map for inserts/finds. No removes for added simplicity. + @STRINGTABLE: Map from string hash to string offset in local buffer + @SOCKETS: Sockets TCP/IP Wrapper + @SHA1: SHA-1 Cryptographic Hash Function + @BASE64: Base-64 encoder + @MURMURHASH: Murmur-Hash 3 + @WEBSOCKETS: WebSockets + @MESSAGEQ: Multiple producer, single consumer message queue + @NETWORK: Network Server + @SAMPLE: Base Sample Description (CPU by default) + @SAMPLETREE: A tree of samples with their allocator + @TPROFILER: Thread Profiler data, storing both sampling and instrumentation results + @TGATHER: Thread Gatherer, periodically polling for newly created threads + @TSAMPLER: Sampling thread contexts + @REMOTERY: Remotery + @CUDA: CUDA event sampling + @D3D11: Direct3D 11 event sampling + @D3D12: Direct3D 12 event sampling + @OPENGL: OpenGL event sampling + @METAL: Metal event sampling + @VULKAN: Vulkan event sampling + @SAMPLEAPI: Sample API for user callbacks + @PROPERTYAPI: Property API for user callbacks + @PROPERTIES: Property API +*/ + +#define RMT_IMPL +#include "Remotery.h" + +#ifdef RMT_PLATFORM_WINDOWS +#pragma comment(lib, "ws2_32.lib") +#pragma comment(lib, "winmm.lib") +#endif + +#if RMT_ENABLED + +// Global settings +static rmtSettings g_Settings; +static rmtBool g_SettingsInitialized = RMT_FALSE; + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @DEPS: External Dependencies +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// clang-format off + +// +// Required CRT dependencies +// +#if RMT_USE_TINYCRT + + #include + #include + #include + + #define CreateFileMapping CreateFileMappingA + #define RMT_ENABLE_THREAD_SAMPLER + +#else + + #ifdef RMT_PLATFORM_MACOS + #include + #include + #include + #include + #else + #if !defined(__FreeBSD__) && !defined(__OpenBSD__) + #include + #endif + #endif + + #include + #include + #include + #include + #include + #include + #include + + #ifdef RMT_PLATFORM_WINDOWS + #include + #include + #ifndef __MINGW32__ + #include + #endif + #undef min + #undef max + #include + #include + #include + typedef long NTSTATUS; // winternl.h + + #ifdef _XBOX_ONE + #ifdef _DURANGO + #include "xmem.h" + #endif + #else + #define RMT_ENABLE_THREAD_SAMPLER + #endif + + #endif + + #ifdef RMT_PLATFORM_LINUX + #if defined(__FreeBSD__) || defined(__OpenBSD__) + #include + #else + #include + #endif + #endif + + #if defined(RMT_PLATFORM_POSIX) + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #endif + + #ifdef __MINGW32__ + #include + #endif + +#endif + +#if RMT_USE_CUDA + #include +#endif + +#if RMT_USE_LEGACY_ATOMICS==0 + #if __cplusplus >= 199711L + #if !defined(RMT_USE_CPP_ATOMICS) + #define RMT_USE_CPP_ATOMICS + #endif + #elif __STDC_VERSION__ >= 201112L + #if !defined(__STDC_NO_ATOMICS__) + #if !defined(RMT_USE_C11_ATOMICS) + #define RMT_USE_C11_ATOMICS + #endif + #endif + #endif +#endif + +#if defined(RMT_USE_C11_ATOMICS) + #include +#elif defined(RMT_USE_CPP_ATOMICS) + #include +#endif + +// clang-format on + +#if defined(_MSC_VER) && !defined(__clang__) + #define RMT_UNREFERENCED_PARAMETER(i) (i) +#else + #define RMT_UNREFERENCED_PARAMETER(i) (void)(1 ? (void)0 : ((void)i)) +#endif + +// Executes the given statement and returns from the calling function if it fails, returning the error with it +#define rmtTry(stmt) \ + { \ + rmtError error = stmt; \ + if (error != RMT_ERROR_NONE) \ + return error; \ + } + +static rmtU8 minU8(rmtU8 a, rmtU8 b) +{ + return a < b ? a : b; +} +static rmtU16 maxU16(rmtU16 a, rmtU16 b) +{ + return a > b ? a : b; +} +static rmtS32 minS32(rmtS32 a, rmtS32 b) +{ + return a < b ? a : b; +} +static rmtS32 maxS32(rmtS32 a, rmtS32 b) +{ + return a > b ? a : b; +} +static rmtU32 minU32(rmtU32 a, rmtU32 b) +{ + return a < b ? a : b; +} +static rmtU32 maxU32(rmtU32 a, rmtU32 b) +{ + return a > b ? a : b; +} +static rmtS64 maxS64(rmtS64 a, rmtS64 b) +{ + return a > b ? a : b; +} + +// Memory management functions +static void* rmtMalloc(rmtU32 size) +{ + return g_Settings.malloc(g_Settings.mm_context, size); +} + +static void* rmtRealloc(void* ptr, rmtU32 size) +{ + return g_Settings.realloc(g_Settings.mm_context, ptr, size); +} + +static void rmtFree(void* ptr) +{ + g_Settings.free(g_Settings.mm_context, ptr); +} + +// File system functions +static FILE* rmtOpenFile(const char* filename, const char* mode) +{ +#if defined(RMT_PLATFORM_WINDOWS) && !RMT_USE_TINYCRT + FILE* fp; + return fopen_s(&fp, filename, mode) == 0 ? fp : NULL; +#else + return fopen(filename, mode); +#endif +} + +void rmtCloseFile(FILE* fp) +{ + if (fp != NULL) + { + fclose(fp); + } +} + +rmtBool rmtWriteFile(FILE* fp, const void* data, rmtU32 size) +{ + assert(fp != NULL); + return fwrite(data, size, 1, fp) == size ? RMT_TRUE : RMT_FALSE; +} + +#if RMT_USE_OPENGL +// DLL/Shared Library functions + +static void* rmtLoadLibrary(const char* path) +{ +#if defined(RMT_PLATFORM_WINDOWS) + return (void*)LoadLibraryA(path); +#elif defined(RMT_PLATFORM_POSIX) + return dlopen(path, RTLD_LOCAL | RTLD_LAZY); +#else + return NULL; +#endif +} + +static void rmtFreeLibrary(void* handle) +{ +#if defined(RMT_PLATFORM_WINDOWS) + FreeLibrary((HMODULE)handle); +#elif defined(RMT_PLATFORM_POSIX) + dlclose(handle); +#endif +} + +#if defined(RMT_PLATFORM_WINDOWS) +typedef FARPROC ProcReturnType; +#else +typedef void* ProcReturnType; +#endif + +static ProcReturnType rmtGetProcAddress(void* handle, const char* symbol) +{ +#if defined(RMT_PLATFORM_WINDOWS) + return GetProcAddress((HMODULE)handle, (LPCSTR)symbol); +#elif defined(RMT_PLATFORM_POSIX) + return dlsym(handle, symbol); +#endif +} + +#endif + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @TIMERS: Platform-specific timers +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// +// Get millisecond timer value that has only one guarantee: multiple calls are consistently comparable. +// On some platforms, even though this returns milliseconds, the timer may be far less accurate. +// +static rmtU32 msTimer_Get() +{ +#ifdef RMT_PLATFORM_WINDOWS + + return (rmtU32)GetTickCount(); + +#else + + clock_t time = clock(); + +// CLOCKS_PER_SEC is 128 on FreeBSD, causing div/0 +#if defined(__FreeBSD__) || defined(__OpenBSD__) + rmtU32 msTime = (rmtU32)(time * 1000 / CLOCKS_PER_SEC); +#else + rmtU32 msTime = (rmtU32)(time / (CLOCKS_PER_SEC / 1000)); +#endif + + return msTime; + +#endif +} + +// +// Micro-second accuracy high performance counter +// +#ifndef RMT_PLATFORM_WINDOWS +typedef rmtU64 LARGE_INTEGER; +#endif +typedef struct +{ + LARGE_INTEGER counter_start; + double counter_scale; +} usTimer; + +static void usTimer_Init(usTimer* timer) +{ +#if defined(RMT_PLATFORM_WINDOWS) + LARGE_INTEGER performance_frequency; + + assert(timer != NULL); + + // Calculate the scale from performance counter to microseconds + QueryPerformanceFrequency(&performance_frequency); + timer->counter_scale = 1000000.0 / performance_frequency.QuadPart; + + // Record the offset for each read of the counter + QueryPerformanceCounter(&timer->counter_start); + +#elif defined(RMT_PLATFORM_MACOS) + + mach_timebase_info_data_t nsScale; + mach_timebase_info(&nsScale); + const double ns_per_us = 1.0e3; + timer->counter_scale = (double)(nsScale.numer) / ((double)nsScale.denom * ns_per_us); + + timer->counter_start = mach_absolute_time(); + +#elif defined(RMT_PLATFORM_LINUX) + + struct timespec tv; + clock_gettime(CLOCK_REALTIME, &tv); + timer->counter_start = (rmtU64)(tv.tv_sec * (rmtU64)1000000) + (rmtU64)(tv.tv_nsec * 0.001); + +#endif +} + +#if defined(RMT_PLATFORM_WINDOWS) + #define usTimer_FromRawTicks(timer, ticks) (rmtU64)(((ticks) - (timer)->counter_start.QuadPart) * (timer)->counter_scale) +#elif defined(RMT_PLATFORM_MACOS) + #define usTimer_FromRawTicks(timer, ticks) (rmtU64)(((ticks) - (timer)->counter_start) * (timer)->counter_scale) +#elif defined(RMT_PLATFORM_LINUX) + #define usTimer_FromRawTicks(timer, ticks) (rmtU64)((ticks) - (timer)->counter_start) +#endif + +static rmtU64 usTimer_Get(usTimer* timer) +{ +#if defined(RMT_PLATFORM_WINDOWS) + LARGE_INTEGER performance_count; + + assert(timer != NULL); + + // Read counter and convert to microseconds + QueryPerformanceCounter(&performance_count); + return usTimer_FromRawTicks(timer, performance_count.QuadPart); + +#elif defined(RMT_PLATFORM_MACOS) + + rmtU64 curr_time = mach_absolute_time(); + return usTimer_FromRawTicks(timer, curr_time); + +#elif defined(RMT_PLATFORM_LINUX) + + struct timespec tv; + clock_gettime(CLOCK_REALTIME, &tv); + rmtU64 ticks = (rmtU64)(tv.tv_sec * (rmtU64)1000000) + (rmtU64)(tv.tv_nsec * 0.001); + return usTimer_FromRawTicks(timer, ticks); + +#endif +} + +static void msSleep(rmtU32 time_ms) +{ +#ifdef RMT_PLATFORM_WINDOWS + Sleep(time_ms); +#elif defined(RMT_PLATFORM_POSIX) + usleep(time_ms * 1000); +#endif +} + +static struct tm* TimeDateNow() +{ + time_t time_now = time(NULL); + +#if defined(RMT_PLATFORM_WINDOWS) && !RMT_USE_TINYCRT + // Discard the thread-safety benefit of gmtime_s + static struct tm tm_now; + gmtime_s(&tm_now, &time_now); + return &tm_now; +#else + return gmtime(&time_now); +#endif +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @TLS: Thread-Local Storage +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#define TLS_INVALID_HANDLE 0xFFFFFFFF + +#if defined(RMT_PLATFORM_WINDOWS) +typedef rmtU32 rmtTLS; +#else +typedef pthread_key_t rmtTLS; +#endif + +static rmtError tlsAlloc(rmtTLS* handle) +{ + assert(handle != NULL); + +#if defined(RMT_PLATFORM_WINDOWS) + *handle = (rmtTLS)TlsAlloc(); + if (*handle == TLS_OUT_OF_INDEXES) + { + *handle = TLS_INVALID_HANDLE; + return RMT_ERROR_TLS_ALLOC_FAIL; + } +#elif defined(RMT_PLATFORM_POSIX) + if (pthread_key_create(handle, NULL) != 0) + { + *handle = TLS_INVALID_HANDLE; + return RMT_ERROR_TLS_ALLOC_FAIL; + } +#endif + + return RMT_ERROR_NONE; +} + +static void tlsFree(rmtTLS handle) +{ + assert(handle != TLS_INVALID_HANDLE); +#if defined(RMT_PLATFORM_WINDOWS) + TlsFree(handle); +#elif defined(RMT_PLATFORM_POSIX) + pthread_key_delete((pthread_key_t)handle); +#endif +} + +static void tlsSet(rmtTLS handle, void* value) +{ + assert(handle != TLS_INVALID_HANDLE); +#if defined(RMT_PLATFORM_WINDOWS) + TlsSetValue(handle, value); +#elif defined(RMT_PLATFORM_POSIX) + pthread_setspecific((pthread_key_t)handle, value); +#endif +} + +static void* tlsGet(rmtTLS handle) +{ + assert(handle != TLS_INVALID_HANDLE); +#if defined(RMT_PLATFORM_WINDOWS) + return TlsGetValue(handle); +#elif defined(RMT_PLATFORM_POSIX) + return pthread_getspecific((pthread_key_t)handle); +#endif +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @ERROR: Error handling +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// Used to store per-thread error messages +// Static so that we can set error messages from code the Remotery object depends on +static rmtTLS g_lastErrorMessageTlsHandle = TLS_INVALID_HANDLE; +static const rmtU32 g_errorMessageSize = 1024; + +static rmtError rmtMakeError(rmtError in_error, rmtPStr error_message) +{ + char* thread_message_ptr; + rmtU32 error_len; + + // Allocate the TLS on-demand + // TODO(don): Make this thread-safe + if (g_lastErrorMessageTlsHandle == TLS_INVALID_HANDLE) + { + rmtTry(tlsAlloc(&g_lastErrorMessageTlsHandle)); + } + + // Allocate the string storage for the error message on-demand + thread_message_ptr = (char*)tlsGet(g_lastErrorMessageTlsHandle); + if (thread_message_ptr == NULL) + { + thread_message_ptr = (char*)rmtMalloc(g_errorMessageSize); + if (thread_message_ptr == NULL) + { + return RMT_ERROR_MALLOC_FAIL; + } + + tlsSet(g_lastErrorMessageTlsHandle, (void*)thread_message_ptr); + } + + // Safe copy of the error text without going via strcpy_s down below + error_len = (rmtU32)strlen(error_message); + error_len = error_len >= g_errorMessageSize ? g_errorMessageSize - 1 : error_len; + memcpy(thread_message_ptr, error_message, error_len); + thread_message_ptr[error_len] = 0; + + return in_error; +} + +RMT_API rmtPStr rmt_GetLastErrorMessage() +{ + rmtPStr thread_message_ptr; + + // No message to specify if `rmtMakeError` failed or one hasn't been set yet + if (g_lastErrorMessageTlsHandle == TLS_INVALID_HANDLE) + { + return "No error message"; + } + thread_message_ptr = (rmtPStr)tlsGet(g_lastErrorMessageTlsHandle); + if (thread_message_ptr == NULL) + { + return "No error message"; + } + + return thread_message_ptr; +} + + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @MUTEX: Mutexes +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#ifdef RMT_PLATFORM_WINDOWS +typedef CRITICAL_SECTION rmtMutex; +#else +typedef pthread_mutex_t rmtMutex; +#endif + +static void mtxInit(rmtMutex* mutex) +{ + assert(mutex != NULL); +#if defined(RMT_PLATFORM_WINDOWS) + InitializeCriticalSection(mutex); +#elif defined(RMT_PLATFORM_POSIX) + pthread_mutex_init(mutex, NULL); +#endif +} + +static void mtxLock(rmtMutex* mutex) +{ + assert(mutex != NULL); +#if defined(RMT_PLATFORM_WINDOWS) + EnterCriticalSection(mutex); +#elif defined(RMT_PLATFORM_POSIX) + pthread_mutex_lock(mutex); +#endif +} + +static void mtxUnlock(rmtMutex* mutex) +{ + assert(mutex != NULL); +#if defined(RMT_PLATFORM_WINDOWS) + LeaveCriticalSection(mutex); +#elif defined(RMT_PLATFORM_POSIX) + pthread_mutex_unlock(mutex); +#endif +} + +static void mtxDelete(rmtMutex* mutex) +{ + assert(mutex != NULL); +#if defined(RMT_PLATFORM_WINDOWS) + DeleteCriticalSection(mutex); +#elif defined(RMT_PLATFORM_POSIX) + pthread_mutex_destroy(mutex); +#endif +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @ATOMIC: Atomic Operations +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// TODO(don): The CAS loops possible with this API are suboptimal. For example, AtomicCompareAndSwapU32 discards the +// return value which tells you the current (potentially mismatching) value of the location you want to modify. This +// means the CAS loop has to explicitly re-load this location on each modify attempt. Instead, the return value should +// be used to update the old value and an initial load only made once before the loop starts. + +// TODO(don): Vary these types across versions of C and C++ +#if defined(RMT_USE_C11_ATOMICS) + typedef _Atomic(rmtS32) rmtAtomicS32; + typedef _Atomic(rmtU32) rmtAtomicU32; + typedef _Atomic(rmtU64) rmtAtomicU64; + typedef _Atomic(rmtBool) rmtAtomicBool; + #define rmtAtomicPtr(type) _Atomic(type *) +#elif defined(RMT_USE_CPP_ATOMICS) + typedef std::atomic< rmtS32 > rmtAtomicS32; + typedef std::atomic< rmtU32 > rmtAtomicU32; + typedef std::atomic< rmtU64 > rmtAtomicU64; + typedef std::atomic< rmtBool > rmtAtomicBool; + #define rmtAtomicPtr(type) std::atomic< type * > +#else + typedef volatile rmtS32 rmtAtomicS32; + typedef volatile rmtU32 rmtAtomicU32; + typedef volatile rmtU64 rmtAtomicU64; + typedef volatile rmtBool rmtAtomicBool; + #define rmtAtomicPtr(type) volatile type* +#endif + +typedef rmtAtomicPtr(void) rmtAtomicVoidPtr; + +static rmtBool AtomicCompareAndSwapU32(rmtAtomicU32 volatile* val, rmtU32 old_val, rmtU32 new_val) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_compare_exchange_strong(val, &old_val, new_val); +#elif defined(RMT_USE_CPP_ATOMICS) + return val->compare_exchange_strong(old_val, new_val); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return _InterlockedCompareExchange((long volatile*)val, new_val, old_val) == old_val ? RMT_TRUE : RMT_FALSE; +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return __sync_bool_compare_and_swap(val, old_val, new_val) ? RMT_TRUE : RMT_FALSE; +#endif +} + + +static rmtBool AtomicCompareAndSwapU64(rmtAtomicU64 volatile* val, rmtU64 old_val, rmtU64 new_val) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_compare_exchange_strong(val, &old_val, new_val); +#elif defined(RMT_USE_CPP_ATOMICS) + return val->compare_exchange_strong(old_val, new_val); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return _InterlockedCompareExchange64((volatile LONG64*)val, (LONG64)new_val, (LONG64)old_val) == (LONG64)old_val + ? RMT_TRUE + : RMT_FALSE; +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return __sync_bool_compare_and_swap(val, old_val, new_val) ? RMT_TRUE : RMT_FALSE; +#endif +} + +static rmtBool AtomicCompareAndSwapPointer(rmtAtomicVoidPtr volatile* ptr, void* old_ptr, void* new_ptr) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_compare_exchange_strong(ptr, &old_ptr, new_ptr); +#elif defined(RMT_USE_CPP_ATOMICS) + return ptr->compare_exchange_strong(old_ptr, new_ptr); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) +#ifdef _WIN64 + return _InterlockedCompareExchange64((__int64 volatile*)ptr, (__int64)new_ptr, (__int64)old_ptr) == (__int64)old_ptr + ? RMT_TRUE + : RMT_FALSE; +#else + return _InterlockedCompareExchange((long volatile*)ptr, (long)new_ptr, (long)old_ptr) == (long)old_ptr ? RMT_TRUE + : RMT_FALSE; +#endif +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return __sync_bool_compare_and_swap(ptr, old_ptr, new_ptr) ? RMT_TRUE : RMT_FALSE; +#endif +} + +// +// NOTE: Does not guarantee a memory barrier +// TODO: Make sure all platforms don't insert a memory barrier as this is only for stats +// Alternatively, add strong/weak memory order equivalents +// +static rmtS32 AtomicAddS32(rmtAtomicS32* value, rmtS32 add) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_fetch_add(value, add); +#elif defined(RMT_USE_CPP_ATOMICS) + return value->fetch_add(add); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return _InterlockedExchangeAdd((long volatile*)value, (long)add); +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return __sync_fetch_and_add(value, add); +#endif +} + +static rmtU32 AtomicAddU32(rmtAtomicU32* value, rmtU32 add) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_fetch_add(value, add); +#elif defined(RMT_USE_CPP_ATOMICS) + return value->fetch_add(add); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return (rmtU32)_InterlockedExchangeAdd((long volatile*)value, (long)add); +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return (rmtU32)__sync_fetch_and_add(value, add); +#endif +} + +static rmtU64 AtomicAddU64(rmtAtomicU64* value, rmtU64 add) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_fetch_add(value, add); +#elif defined(RMT_USE_CPP_ATOMICS) + return value->fetch_add(add); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return (rmtU64)_InterlockedExchangeAdd64((long long volatile*)value, (long long)add); +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return (rmtU64)__sync_fetch_and_add(value, add); +#endif +} + +static void AtomicSubS32(rmtAtomicS32* value, rmtS32 sub) +{ + // Not all platforms have an implementation so just negate and add + AtomicAddS32(value, -sub); +} + +static rmtU32 AtomicStoreU32(rmtAtomicU32* value, rmtU32 set) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_exchange(value, set); +#elif defined(RMT_USE_CPP_ATOMICS) + return value->exchange(set); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return (rmtU32)_InterlockedExchange((long volatile*)value, (long) set); +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return (rmtU32)__sync_lock_test_and_set(value, set); +#endif +} + +static rmtU64 AtomicStoreU64(rmtAtomicU64* value, rmtU64 set) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_exchange(value, set); +#elif defined(RMT_USE_CPP_ATOMICS) + return value->exchange(set); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return (rmtU64)_InterlockedExchange64((long long volatile*)value, (long long)set); +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return (rmtU64)__sync_lock_test_and_set(value, set); +#endif +} + +static rmtU32 AtomicLoadU32(rmtAtomicU32* value) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_load(value); +#elif defined(RMT_USE_CPP_ATOMICS) + return value->load(); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return (rmtU32)_InterlockedExchangeAdd((long volatile*)value, (long)0); +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return (rmtU32)__sync_fetch_and_add(value, 0); +#endif +} + +static rmtU64 AtomicLoadU64(rmtAtomicU64* value) +{ +#if defined(RMT_USE_C11_ATOMICS) + return atomic_load(value); +#elif defined(RMT_USE_CPP_ATOMICS) + return value->load(); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + return (rmtU64)_InterlockedExchangeAdd64((long long volatile*)value, (long long)0); +#elif defined(RMT_PLATFORM_POSIX) || defined(__MINGW32__) + return (rmtU64)__sync_fetch_and_add(value, 0); +#endif +} + +static void CompilerWriteFence() +{ +#if defined(__clang__) + __asm__ volatile("" : : : "memory"); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + _WriteBarrier(); +#else + asm volatile("" : : : "memory"); +#endif +} + +static void CompilerReadFence() +{ +#if defined(__clang__) + __asm__ volatile("" : : : "memory"); +#elif defined(RMT_PLATFORM_WINDOWS) && !defined(__MINGW32__) + _ReadBarrier(); +#else + asm volatile("" : : : "memory"); +#endif +} + +static rmtU32 LoadAcquire(rmtAtomicU32* address) +{ + rmtU32 value = *address; + CompilerReadFence(); + return value; +} + +static rmtU64 LoadAcquire64(rmtAtomicU64* address) +{ + rmtU64 value = *address; + CompilerReadFence(); + return value; +} + +static long* LoadAcquirePointer(long* volatile* ptr) +{ + long* value = *ptr; + CompilerReadFence(); + return value; +} + +static void StoreRelease(rmtAtomicU32* address, rmtU32 value) +{ + CompilerWriteFence(); + *address = value; +} + +static void StoreRelease64(rmtAtomicU64* address, rmtU64 value) +{ + CompilerWriteFence(); + *address = value; +} + +static void StoreReleasePointer(long* volatile* ptr, long* value) +{ + CompilerWriteFence(); + *ptr = value; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @RNG: Random Number Generator +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// +// WELL: Well Equidistributed Long-period Linear +// These algorithms produce numbers with better equidistribution than MT19937 and improve upon "bit-mixing" properties. They are +// fast, come in many sizes, and produce higher quality random numbers. +// +// This implementation has a period of 2^512, or 10^154. +// +// Implementation from: Game Programming Gems 7, Random Number Generation Chris Lomont +// Documentation: http://www.lomont.org/Math/Papers/2008/Lomont_PRNG_2008.pdf +// + +// Global RNG state for now +// Far better than interfering with the user's rand() +#define Well512_StateSize 16 +static rmtU32 Well512_State[Well512_StateSize]; +static rmtU32 Well512_Index; + +static void Well512_Init(rmtU32 seed) +{ + rmtU32 i; + + // Generate initial state from seed + Well512_State[0] = seed; + for (i = 1; i < Well512_StateSize; i++) + { + rmtU32 prev = Well512_State[i - 1]; + Well512_State[i] = (1812433253 * (prev ^ (prev >> 30)) + i); + } + Well512_Index = 0; +} + +static rmtU32 Well512_RandomU32() +{ + rmtU32 a, b, c, d; + + a = Well512_State[Well512_Index]; + c = Well512_State[(Well512_Index + 13) & 15]; + b = a ^ c ^ (a << 16) ^ (c << 15); + c = Well512_State[(Well512_Index + 9) & 15]; + c ^= (c >> 11); + a = Well512_State[Well512_Index] = b ^ c; + d = a ^ ((a << 5) & 0xDA442D24UL); + Well512_Index = (Well512_Index + 15) & 15; + a = Well512_State[Well512_Index]; + Well512_State[Well512_Index] = a ^ b ^ d ^ (a << 2) ^ (b << 18) ^ (c << 28); + return Well512_State[Well512_Index]; +} + +static rmtU32 Well512_RandomOpenLimit(rmtU32 limit) +{ + // Using % to modulo with range is just masking out the higher bits, leaving a result that's objectively biased. + // Dividing by RAND_MAX is better but leads to increased repetition at low ranges due to very large bucket sizes. + // Instead use multiple passes with smaller bucket sizes, rejecting results that don't fit into this smaller range. + rmtU32 bucket_size = UINT_MAX / limit; + rmtU32 bucket_limit = bucket_size * limit; + rmtU32 r; + do + { + r = Well512_RandomU32(); + } while(r >= bucket_limit); + + return r / bucket_size; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @LFSR: Galois Linear-feedback Shift Register +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +static rmtU32 Log2i(rmtU32 x) +{ + static const rmtU8 MultiplyDeBruijnBitPosition[32] = + { + 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 + }; + + // First round down to one less than a power of two + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return MultiplyDeBruijnBitPosition[(rmtU32)(x * 0x07C4ACDDU) >> 27]; +} + +static rmtU32 GaloisLFSRMask(rmtU32 table_size_log2) +{ + // Taps for 4 to 8 bit ranges + static const rmtU8 XORMasks[] = + { + ((1 << 0) | (1 << 1)), // 2 + ((1 << 1) | (1 << 2)), // 3 + ((1 << 2) | (1 << 3)), // 4 + ((1 << 2) | (1 << 4)), // 5 + ((1 << 4) | (1 << 5)), // 6 + ((1 << 5) | (1 << 6)), // 7 + ((1 << 3) | (1 << 4) | (1 << 5) | (1 << 7)), // 8 + }; + + // Map table size to required XOR mask + assert(table_size_log2 >= 2); + assert(table_size_log2 <= 8); + return XORMasks[table_size_log2 - 2]; +} + +static rmtU32 GaloisLFSRNext(rmtU32 value, rmtU32 xor_mask) +{ + // Output bit + rmtU32 lsb = value & 1; + + // Apply the register shift + value >>= 1; + + // Apply toggle mask if the output bit is set + if (lsb != 0) + { + value ^= xor_mask; + } + + return value; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @NEW: New/Delete operators with error values for simplifying object create/destroy +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#define rmtTryMalloc(type, obj) \ + obj = (type*)rmtMalloc(sizeof(type)); \ + if (obj == NULL) \ + { \ + return RMT_ERROR_MALLOC_FAIL; \ + } + +#define rmtTryMallocArray(type, obj, count) \ + obj = (type*)rmtMalloc((count) * sizeof(type)); \ + if (obj == NULL) \ + { \ + return RMT_ERROR_MALLOC_FAIL; \ + } + +// Ensures the pointer is non-NULL, calls the destructor, frees memory and sets the pointer to NULL +#define rmtDelete(type, obj) \ + if (obj != NULL) \ + { \ + type##_Destructor(obj); \ + rmtFree(obj); \ + obj = NULL; \ + } + +// New will allocate enough space for the object and call the constructor +// If allocation fails the constructor won't be called +// If the constructor fails, the destructor is called and memory is released +// NOTE: Use of sizeof() requires that the type be defined at the point of call +// This is a disadvantage over requiring only a custom Create function +#define rmtTryNew(type, obj, ...) \ + { \ + obj = (type*)rmtMalloc(sizeof(type)); \ + if (obj == NULL) \ + { \ + return RMT_ERROR_MALLOC_FAIL; \ + } \ + rmtError error = type##_Constructor(obj, ##__VA_ARGS__); \ + if (error != RMT_ERROR_NONE) \ + { \ + rmtDelete(type, obj); \ + return error; \ + } \ + } + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @VMBUFFER: Mirror Buffer using Virtual Memory for auto-wrap +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +typedef struct VirtualMirrorBuffer +{ + // Page-rounded size of the buffer without mirroring + rmtU32 size; + + // Pointer to the first part of the mirror + // The second part comes directly after at ptr+size bytes + rmtU8* ptr; + +#ifdef RMT_PLATFORM_WINDOWS +#ifdef _DURANGO + size_t page_count; + size_t* page_mapping; +#else + HANDLE file_map_handle; +#endif +#endif + +} VirtualMirrorBuffer; + +#ifdef __ANDROID__ +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#define ASHMEM_DEVICE "/dev/ashmem" + +/* + * ashmem_create_region - creates a new ashmem region and returns the file + * descriptor, or <0 on error + * + * `name' is an optional label to give the region (visible in /proc/pid/maps) + * `size' is the size of the region, in page-aligned bytes + */ +static int ashmem_dev_create_region(const char* name, size_t size) +{ + int fd, ret; + + fd = open(ASHMEM_DEVICE, O_RDWR); + if (fd < 0) + return fd; + + if (name) + { + char buf[ASHMEM_NAME_LEN] = {0}; + + strncpy(buf, name, sizeof(buf)); + buf[sizeof(buf) - 1] = 0; + ret = ioctl(fd, ASHMEM_SET_NAME, buf); + if (ret < 0) + goto error; + } + + ret = ioctl(fd, ASHMEM_SET_SIZE, size); + if (ret < 0) + goto error; + + return fd; + +error: + close(fd); + return ret; +} + +/* + * Copyright (C) 2008 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// https://chromium.googlesource.com/chromium/src/+/ad02487d87120bd66045960ffafe0fc27600af50/third_party/ashmem/ashmem-dev.c#181 + +// Starting with API level 26, the following functions from +// libandroid.so should be used to create shared memory regions. +typedef int(*ASharedMemory_createFunc)(const char*, size_t); +typedef size_t(*ASharedMemory_getSizeFunc)(int fd); +typedef int(*ASharedMemory_setProtFunc)(int fd, int prot); + +typedef struct { + ASharedMemory_createFunc create; + ASharedMemory_getSizeFunc getSize; + ASharedMemory_setProtFunc setProt; +} ASharedMemoryFuncs; + +static void* s_LibAndroid = 0; +static pthread_once_t s_ashmem_funcs_once = PTHREAD_ONCE_INIT; +static ASharedMemoryFuncs s_ashmem_funcs = {}; + +static void ashmem_init_funcs() { + ASharedMemoryFuncs* funcs = &s_ashmem_funcs; + if (android_get_device_api_level() >= __ANDROID_API_O__) { + // Leaked intentionally! + s_LibAndroid = dlopen("libandroid.so", RTLD_NOW); + funcs->create = (ASharedMemory_createFunc)dlsym(s_LibAndroid, "ASharedMemory_create"); + } else { + funcs->create = &ashmem_dev_create_region; + } +} + +static const ASharedMemoryFuncs* ashmem_get_funcs() { + pthread_once(&s_ashmem_funcs_once, ashmem_init_funcs); + return &s_ashmem_funcs; +} + +static int ashmem_create_region(const char* name, size_t size) { + return ashmem_get_funcs()->create(name, size); +} + +#endif // __ANDROID__ + +static rmtError VirtualMirrorBuffer_Constructor(VirtualMirrorBuffer* buffer, rmtU32 size, int nb_attempts) +{ + static const rmtU32 k_64 = 64 * 1024; + RMT_UNREFERENCED_PARAMETER(nb_attempts); + +#ifdef RMT_PLATFORM_LINUX +#if defined(__FreeBSD__) || defined(__OpenBSD__) + char path[] = "/tmp/ring-buffer-XXXXXX"; +#else + char path[] = "/dev/shm/ring-buffer-XXXXXX"; +#endif + int file_descriptor; +#endif + + // Round up to page-granulation; the nearest 64k boundary for now + size = (size + k_64 - 1) / k_64 * k_64; + + // Set defaults + buffer->size = size; + buffer->ptr = NULL; +#ifdef RMT_PLATFORM_WINDOWS +#ifdef _DURANGO + buffer->page_count = 0; + buffer->page_mapping = NULL; +#else + buffer->file_map_handle = INVALID_HANDLE_VALUE; +#endif +#endif + +#ifdef RMT_PLATFORM_WINDOWS +#ifdef _DURANGO + + // Xbox version based on Windows version and XDK reference + + buffer->page_count = size / k_64; + if (buffer->page_mapping) + { + free(buffer->page_mapping); + } + buffer->page_mapping = (size_t*)malloc(sizeof(ULONG) * buffer->page_count); + + while (nb_attempts-- > 0) + { + rmtU8* desired_addr; + + // Create a page mapping for pointing to its physical address with multiple virtual pages + if (!AllocateTitlePhysicalPages(GetCurrentProcess(), MEM_LARGE_PAGES, &buffer->page_count, + buffer->page_mapping)) + { + free(buffer->page_mapping); + buffer->page_mapping = NULL; + break; + } + + // Reserve two contiguous pages of virtual memory + desired_addr = (rmtU8*)VirtualAlloc(0, size * 2, MEM_RESERVE, PAGE_NOACCESS); + if (desired_addr == NULL) + break; + + // Release the range immediately but retain the address for the next sequence of code to + // try and map to it. In the mean-time some other OS thread may come along and allocate this + // address range from underneath us so multiple attempts need to be made. + VirtualFree(desired_addr, 0, MEM_RELEASE); + + // Immediately try to point both pages at the file mapping + if (MapTitlePhysicalPages(desired_addr, buffer->page_count, MEM_LARGE_PAGES, PAGE_READWRITE, + buffer->page_mapping) == desired_addr && + MapTitlePhysicalPages(desired_addr + size, buffer->page_count, MEM_LARGE_PAGES, PAGE_READWRITE, + buffer->page_mapping) == desired_addr + size) + { + buffer->ptr = desired_addr; + break; + } + + // Failed to map the virtual pages; cleanup and try again + FreeTitlePhysicalPages(GetCurrentProcess(), buffer->page_count, buffer->page_mapping); + buffer->page_mapping = NULL; + } + +#else + + // Windows version based on https://gist.github.com/rygorous/3158316 + + while (nb_attempts-- > 0) + { + rmtU8* desired_addr; + + // Create a file mapping for pointing to its physical address with multiple virtual pages + buffer->file_map_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, size, 0); + if (buffer->file_map_handle == NULL) + break; + +#ifndef _UWP // NON-UWP Windows Desktop Version + + // Reserve two contiguous pages of virtual memory + desired_addr = (rmtU8*)VirtualAlloc(0, size * 2, MEM_RESERVE, PAGE_NOACCESS); + if (desired_addr == NULL) + break; + + // Release the range immediately but retain the address for the next sequence of code to + // try and map to it. In the mean-time some other OS thread may come along and allocate this + // address range from underneath us so multiple attempts need to be made. + VirtualFree(desired_addr, 0, MEM_RELEASE); + + // Immediately try to point both pages at the file mapping + if (MapViewOfFileEx(buffer->file_map_handle, FILE_MAP_ALL_ACCESS, 0, 0, size, desired_addr) == desired_addr && + MapViewOfFileEx(buffer->file_map_handle, FILE_MAP_ALL_ACCESS, 0, 0, size, desired_addr + size) == + desired_addr + size) + { + buffer->ptr = desired_addr; + break; + } + +#else // UWP + + // Implementation based on example from: + // https://docs.microsoft.com/en-us/windows/desktop/api/memoryapi/nf-memoryapi-virtualalloc2 + // + // Notes + // - just replaced the non-uwp functions by the uwp variants. + // - Both versions could be rewritten to not need the try-loop, see the example mentioned above. I just keep it + // as is for now. + // - Successfully tested on Hololens + desired_addr = (rmtU8*)VirtualAlloc2FromApp(NULL, NULL, 2 * size, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, + PAGE_NOACCESS, NULL, 0); + + // Split the placeholder region into two regions of equal size. + VirtualFree(desired_addr, size, MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER); + + // Immediately try to point both pages at the file mapping. + if (MapViewOfFile3FromApp(buffer->file_map_handle, NULL, desired_addr, 0, size, MEM_REPLACE_PLACEHOLDER, + PAGE_READWRITE, NULL, 0) == desired_addr && + MapViewOfFile3FromApp(buffer->file_map_handle, NULL, desired_addr + size, 0, size, MEM_REPLACE_PLACEHOLDER, + PAGE_READWRITE, NULL, 0) == desired_addr + size) + { + buffer->ptr = desired_addr; + break; + } +#endif + // Failed to map the virtual pages; cleanup and try again + CloseHandle(buffer->file_map_handle); + buffer->file_map_handle = NULL; + } + +#endif // _XBOX_ONE + +#endif + +#ifdef RMT_PLATFORM_MACOS + + // + // Mac version based on https://github.com/mikeash/MAMirroredQueue + // + // Copyright (c) 2010, Michael Ash + // All rights reserved. + // + // Redistribution and use in source and binary forms, with or without modification, are permitted provided that + // the following conditions are met: + // + // Redistributions of source code must retain the above copyright notice, this list of conditions and the following + // disclaimer. + // + // Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the + // following disclaimer in the documentation and/or other materials provided with the distribution. + // Neither the name of Michael Ash nor the names of its contributors may be used to endorse or promote products + // derived from this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED + // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + // INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + // IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + + while (nb_attempts-- > 0) + { + vm_prot_t cur_prot, max_prot; + kern_return_t mach_error; + rmtU8* ptr = NULL; + rmtU8* target = NULL; + + // Allocate 2 contiguous pages of virtual memory + if (vm_allocate(mach_task_self(), (vm_address_t*)&ptr, size * 2, VM_FLAGS_ANYWHERE) != KERN_SUCCESS) + break; + + // Try to deallocate the last page, leaving its virtual memory address free + target = ptr + size; + if (vm_deallocate(mach_task_self(), (vm_address_t)target, size) != KERN_SUCCESS) + { + vm_deallocate(mach_task_self(), (vm_address_t)ptr, size * 2); + break; + } + + // Attempt to remap the page just deallocated to the buffer again + mach_error = vm_remap(mach_task_self(), (vm_address_t*)&target, size, + 0, // mask + 0, // anywhere + mach_task_self(), (vm_address_t)ptr, + 0, // copy + &cur_prot, &max_prot, VM_INHERIT_COPY); + + if (mach_error == KERN_NO_SPACE) + { + // Failed on this pass, cleanup and make another attempt + if (vm_deallocate(mach_task_self(), (vm_address_t)ptr, size) != KERN_SUCCESS) + break; + } + + else if (mach_error == KERN_SUCCESS) + { + // Leave the loop on success + buffer->ptr = ptr; + break; + } + + else + { + // Unknown error, can't recover + vm_deallocate(mach_task_self(), (vm_address_t)ptr, size); + break; + } + } + +#endif + +#ifdef RMT_PLATFORM_LINUX + + // Linux version based on now-defunct Wikipedia section + // http://en.wikipedia.org/w/index.php?title=Circular_buffer&oldid=600431497 + +#ifdef __ANDROID__ + file_descriptor = ashmem_create_region("remotery_shm", size * 2); + if (file_descriptor < 0) + { + return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL; + } +#else + // Create a unique temporary filename in the shared memory folder + file_descriptor = mkstemp(path); + if (file_descriptor < 0) + return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL; + + // Delete the name + if (unlink(path)) + return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL; + + // Set the file size to twice the buffer size + // TODO: this 2x behaviour can be avoided with similar solution to Win/Mac + if (ftruncate(file_descriptor, size * 2)) + return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL; + +#endif + // Map 2 contiguous pages + buffer->ptr = (rmtU8*)mmap(NULL, size * 2, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (buffer->ptr == MAP_FAILED) + { + buffer->ptr = NULL; + return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL; + } + + // Point both pages to the same memory file + if (mmap(buffer->ptr, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, file_descriptor, 0) != buffer->ptr || + mmap(buffer->ptr + size, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, file_descriptor, 0) != + buffer->ptr + size) + return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL; + +#endif + + // Cleanup if exceeded number of attempts or failed + if (buffer->ptr == NULL) + return RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL; + + return RMT_ERROR_NONE; +} + +static void VirtualMirrorBuffer_Destructor(VirtualMirrorBuffer* buffer) +{ + assert(buffer != 0); + +#ifdef RMT_PLATFORM_WINDOWS +#ifdef _DURANGO + if (buffer->page_mapping != NULL) + { + VirtualFree(buffer->ptr, 0, MEM_DECOMMIT); // needed in conjunction with FreeTitlePhysicalPages + FreeTitlePhysicalPages(GetCurrentProcess(), buffer->page_count, buffer->page_mapping); + free(buffer->page_mapping); + buffer->page_mapping = NULL; + } +#else + if (buffer->file_map_handle != NULL) + { + // FIXME, don't we need to unmap the file views obtained in VirtualMirrorBuffer_Constructor, both for + // uwp/non-uwp See example + // https://docs.microsoft.com/en-us/windows/desktop/api/memoryapi/nf-memoryapi-virtualalloc2 + + CloseHandle(buffer->file_map_handle); + buffer->file_map_handle = NULL; + } +#endif +#endif + +#ifdef RMT_PLATFORM_MACOS + if (buffer->ptr != NULL) + vm_deallocate(mach_task_self(), (vm_address_t)buffer->ptr, buffer->size * 2); +#endif + +#ifdef RMT_PLATFORM_LINUX + if (buffer->ptr != NULL) + munmap(buffer->ptr, buffer->size * 2); +#endif + + buffer->ptr = NULL; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @SAFEC: Safe C Library excerpts + http://sourceforge.net/projects/safeclib/ +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +/*------------------------------------------------------------------ + * + * November 2008, Bo Berry + * + * Copyright (c) 2008-2011 by Cisco Systems, Inc + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + *------------------------------------------------------------------ + */ + +// NOTE: Microsoft also has its own version of these functions so I'm do some hacky PP to remove them +#define strnlen_s strnlen_s_safe_c +#define strncat_s strncat_s_safe_c +#define strcpy_s strcpy_s_safe_c + +#define RSIZE_MAX_STR (4UL << 10) /* 4KB */ +#define RCNEGATE(x) x + +#define EOK (0) +#define ESNULLP (400) /* null ptr */ +#define ESZEROL (401) /* length is zero */ +#define ESLEMAX (403) /* length exceeds max */ +#define ESOVRLP (404) /* overlap undefined */ +#define ESNOSPC (406) /* not enough space for s2 */ +#define ESUNTERM (407) /* unterminated string */ +#define ESNOTFND (409) /* not found */ + +#ifndef _ERRNO_T_DEFINED +#define _ERRNO_T_DEFINED +typedef int errno_t; +#endif + +// rsize_t equivalent without going to the hassle of detecting if a platform has implemented C11/K3.2 +typedef unsigned int r_size_t; + +static r_size_t strnlen_s(const char* dest, r_size_t dmax) +{ + r_size_t count; + + if (dest == NULL) + { + return RCNEGATE(0); + } + + if (dmax == 0) + { + return RCNEGATE(0); + } + + if (dmax > RSIZE_MAX_STR) + { + return RCNEGATE(0); + } + + count = 0; + while (*dest && dmax) + { + count++; + dmax--; + dest++; + } + + return RCNEGATE(count); +} + +static errno_t strstr_s(char* dest, r_size_t dmax, const char* src, r_size_t slen, char** substring) +{ + r_size_t len; + r_size_t dlen; + int i; + + if (substring == NULL) + { + return RCNEGATE(ESNULLP); + } + *substring = NULL; + + if (dest == NULL) + { + return RCNEGATE(ESNULLP); + } + + if (dmax == 0) + { + return RCNEGATE(ESZEROL); + } + + if (dmax > RSIZE_MAX_STR) + { + return RCNEGATE(ESLEMAX); + } + + if (src == NULL) + { + return RCNEGATE(ESNULLP); + } + + if (slen == 0) + { + return RCNEGATE(ESZEROL); + } + + if (slen > RSIZE_MAX_STR) + { + return RCNEGATE(ESLEMAX); + } + + /* + * src points to a string with zero length, or + * src equals dest, return dest + */ + if (*src == '\0' || dest == src) + { + *substring = dest; + return RCNEGATE(EOK); + } + + while (*dest && dmax) + { + i = 0; + len = slen; + dlen = dmax; + + while (src[i] && dlen) + { + + /* not a match, not a substring */ + if (dest[i] != src[i]) + { + break; + } + + /* move to the next char */ + i++; + len--; + dlen--; + + if (src[i] == '\0' || !len) + { + *substring = dest; + return RCNEGATE(EOK); + } + } + dest++; + dmax--; + } + + /* + * substring was not found, return NULL + */ + *substring = NULL; + return RCNEGATE(ESNOTFND); +} + +static errno_t strncat_s(char* dest, r_size_t dmax, const char* src, r_size_t slen) +{ + const char* overlap_bumper; + + if (dest == NULL) + { + return RCNEGATE(ESNULLP); + } + + if (src == NULL) + { + return RCNEGATE(ESNULLP); + } + + if (slen > RSIZE_MAX_STR) + { + return RCNEGATE(ESLEMAX); + } + + if (dmax == 0) + { + return RCNEGATE(ESZEROL); + } + + if (dmax > RSIZE_MAX_STR) + { + return RCNEGATE(ESLEMAX); + } + + /* hold base of dest in case src was not copied */ + + if (dest < src) + { + overlap_bumper = src; + + /* Find the end of dest */ + while (*dest != '\0') + { + + if (dest == overlap_bumper) + { + return RCNEGATE(ESOVRLP); + } + + dest++; + dmax--; + if (dmax == 0) + { + return RCNEGATE(ESUNTERM); + } + } + + while (dmax > 0) + { + if (dest == overlap_bumper) + { + return RCNEGATE(ESOVRLP); + } + + /* + * Copying truncated before the source null is encountered + */ + if (slen == 0) + { + *dest = '\0'; + return RCNEGATE(EOK); + } + + *dest = *src; + if (*dest == '\0') + { + return RCNEGATE(EOK); + } + + dmax--; + slen--; + dest++; + src++; + } + } + else + { + overlap_bumper = dest; + + /* Find the end of dest */ + while (*dest != '\0') + { + + /* + * NOTE: no need to check for overlap here since src comes first + * in memory and we're not incrementing src here. + */ + dest++; + dmax--; + if (dmax == 0) + { + return RCNEGATE(ESUNTERM); + } + } + + while (dmax > 0) + { + if (src == overlap_bumper) + { + return RCNEGATE(ESOVRLP); + } + + /* + * Copying truncated + */ + if (slen == 0) + { + *dest = '\0'; + return RCNEGATE(EOK); + } + + *dest = *src; + if (*dest == '\0') + { + return RCNEGATE(EOK); + } + + dmax--; + slen--; + dest++; + src++; + } + } + + /* + * the entire src was not copied, so the string will be nulled. + */ + return RCNEGATE(ESNOSPC); +} + +errno_t strcpy_s(char* dest, r_size_t dmax, const char* src) +{ + const char* overlap_bumper; + + if (dest == NULL) + { + return RCNEGATE(ESNULLP); + } + + if (dmax == 0) + { + return RCNEGATE(ESZEROL); + } + + if (dmax > RSIZE_MAX_STR) + { + return RCNEGATE(ESLEMAX); + } + + if (src == NULL) + { + *dest = '\0'; + return RCNEGATE(ESNULLP); + } + + if (dest == src) + { + return RCNEGATE(EOK); + } + + if (dest < src) + { + overlap_bumper = src; + + while (dmax > 0) + { + if (dest == overlap_bumper) + { + return RCNEGATE(ESOVRLP); + } + + *dest = *src; + if (*dest == '\0') + { + return RCNEGATE(EOK); + } + + dmax--; + dest++; + src++; + } + } + else + { + overlap_bumper = dest; + + while (dmax > 0) + { + if (src == overlap_bumper) + { + return RCNEGATE(ESOVRLP); + } + + *dest = *src; + if (*dest == '\0') + { + return RCNEGATE(EOK); + } + + dmax--; + dest++; + src++; + } + } + + /* + * the entire src must have been copied, if not reset dest + * to null the string. + */ + return RCNEGATE(ESNOSPC); +} + +/* very simple integer to hex */ +static const char* hex_encoding_table = "0123456789ABCDEF"; + +static void itoahex_s(char* dest, r_size_t dmax, rmtS32 value) +{ + r_size_t len; + rmtS32 halfbytepos; + + halfbytepos = 8; + + /* strip leading 0's */ + while (halfbytepos > 1) + { + --halfbytepos; + if (value >> (4 * halfbytepos) & 0xF) + { + ++halfbytepos; + break; + } + } + + len = 0; + while (len + 1 < dmax && halfbytepos > 0) + { + --halfbytepos; + dest[len] = hex_encoding_table[value >> (4 * halfbytepos) & 0xF]; + ++len; + } + + if (len < dmax) + { + dest[len] = 0; + } +} + +static const char* itoa_s(rmtS32 value) +{ + static char temp_dest[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int pos = 10; + + // Work back with the absolute value + rmtS32 abs_value = abs(value); + while (abs_value > 0) + { + temp_dest[pos--] = '0' + (abs_value % 10); + abs_value /= 10; + } + + // Place the negative + if (value < 0) + { + temp_dest[pos--] = '-'; + } + + return temp_dest + pos + 1; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @OSTHREADS: Wrappers around OS-specific thread functions +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#ifdef RMT_PLATFORM_WINDOWS +typedef DWORD rmtThreadId; +typedef HANDLE rmtThreadHandle; +#else +typedef uintptr_t rmtThreadId; +typedef pthread_t rmtThreadHandle; +#endif + +#ifdef RMT_PLATFORM_WINDOWS +typedef CONTEXT rmtCpuContext; +#else +typedef int rmtCpuContext; +#endif + +static rmtU32 rmtGetNbProcessors() +{ +#ifdef RMT_PLATFORM_WINDOWS + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + return system_info.dwNumberOfProcessors; +#else + // TODO: get_nprocs_conf / get_nprocs + return 0; +#endif +} + +static rmtThreadId rmtGetCurrentThreadId() +{ +#ifdef RMT_PLATFORM_WINDOWS + return GetCurrentThreadId(); +#else + return (rmtThreadId)pthread_self(); +#endif +} + +static rmtBool rmtSuspendThread(rmtThreadHandle thread_handle) +{ +#ifdef RMT_PLATFORM_WINDOWS + // SuspendThread is an async call to the scheduler and upon return the thread is not guaranteed to be suspended. + // Calling GetThreadContext will serialise that. + // See: https://github.com/mono/mono/blob/master/mono/utils/mono-threads-windows.c#L203 + return SuspendThread(thread_handle) == 0 ? RMT_TRUE : RMT_FALSE; +#else + return RMT_FALSE; +#endif +} + +static void rmtResumeThread(rmtThreadHandle thread_handle) +{ +#ifdef RMT_PLATFORM_WINDOWS + ResumeThread(thread_handle); +#endif +} + +#ifdef RMT_PLATFORM_WINDOWS +#ifndef CONTEXT_EXCEPTION_REQUEST +// These seem to be guarded by a _AMD64_ macro in winnt.h, which doesn't seem to be defined in older MSVC compilers. +// Which makes sense given this was a post-Vista/Windows 7 patch around errors in the WoW64 context switch. +// This bug was never fixed in the OS so defining these will only get this code to compile on Old Windows systems, with no +// guarantee of being stable at runtime. +#define CONTEXT_EXCEPTION_ACTIVE 0x8000000L +#define CONTEXT_SERVICE_ACTIVE 0x10000000L +#define CONTEXT_EXCEPTION_REQUEST 0x40000000L +#define CONTEXT_EXCEPTION_REPORTING 0x80000000L +#endif +#endif + +static rmtBool rmtGetUserModeThreadContext(rmtThreadHandle thread, rmtCpuContext* context) +{ +#ifdef RMT_PLATFORM_WINDOWS + DWORD kernel_mode_mask; + + // Request thread context with exception reporting + context->ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_EXCEPTION_REQUEST; + if (GetThreadContext(thread, context) == 0) + { + return RMT_FALSE; + } + + // Context on WoW64 is only valid and can only be set if the thread isn't in kernel mode + // Typical reference to this appears to be: http://zachsaw.blogspot.com/2010/11/wow64-bug-getthreadcontext-may-return.html + // Confirmed by MS here: https://social.msdn.microsoft.com/Forums/vstudio/en-US/aa176c36-6624-4776-9380-1c9cf37a314e/getthreadcontext-returns-stale-register-values-on-wow64?forum=windowscompatibility + kernel_mode_mask = CONTEXT_EXCEPTION_REPORTING | CONTEXT_EXCEPTION_ACTIVE | CONTEXT_SERVICE_ACTIVE; + return (context->ContextFlags & kernel_mode_mask) == CONTEXT_EXCEPTION_REPORTING ? RMT_TRUE : RMT_FALSE; +#else + return RMT_FALSE; +#endif +} + +static void rmtSetThreadContext(rmtThreadHandle thread_handle, rmtCpuContext* context) +{ +#ifdef RMT_PLATFORM_WINDOWS + SetThreadContext(thread_handle, context); +#endif +} + +static rmtError rmtOpenThreadHandle(rmtThreadId thread_id, rmtThreadHandle* out_thread_handle) +{ +#ifdef RMT_PLATFORM_WINDOWS + // Open the thread with required access rights to get the thread handle + *out_thread_handle = OpenThread(THREAD_QUERY_INFORMATION | THREAD_SUSPEND_RESUME | THREAD_SET_CONTEXT | THREAD_GET_CONTEXT, FALSE, thread_id); + if (*out_thread_handle == NULL) + { + return RMT_ERROR_OPEN_THREAD_HANDLE_FAIL; + } +#endif + + return RMT_ERROR_NONE; +} + +static void rmtCloseThreadHandle(rmtThreadHandle thread_handle) +{ +#ifdef RMT_PLATFORM_WINDOWS + if (thread_handle != NULL) + { + CloseHandle(thread_handle); + } +#endif +} + +#ifdef RMT_ENABLE_THREAD_SAMPLER +DWORD_PTR GetThreadStartAddress(rmtThreadHandle thread_handle) +{ + // Get NtQueryInformationThread from ntdll + HMODULE ntdll = GetModuleHandleA("ntdll.dll"); + if (ntdll != NULL) + { + typedef NTSTATUS (WINAPI *NTQUERYINFOMATIONTHREAD)(HANDLE, LONG, PVOID, ULONG, PULONG); + NTQUERYINFOMATIONTHREAD NtQueryInformationThread = (NTQUERYINFOMATIONTHREAD)GetProcAddress(ntdll, "NtQueryInformationThread"); + + // Use it to query the start address + DWORD_PTR start_address; + NTSTATUS status = NtQueryInformationThread(thread_handle, 9, &start_address, sizeof(DWORD), NULL); + if (status == 0) + { + return start_address; + } + } + + return 0; +} + +const char* GetStartAddressModuleName(DWORD_PTR start_address) +{ + BOOL success; + MODULEENTRY32 module_entry; + + // Snapshot the modules + HANDLE handle = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, 0); + if (handle == INVALID_HANDLE_VALUE) + { + return NULL; + } + + module_entry.dwSize = sizeof(MODULEENTRY32); + module_entry.th32ModuleID = 1; + + // Enumerate modules checking start address against their loaded address range + success = Module32First(handle, &module_entry); + while (success == TRUE) + { + if (start_address >= (DWORD_PTR)module_entry.modBaseAddr && start_address <= ((DWORD_PTR)module_entry.modBaseAddr + module_entry.modBaseSize)) + { + static char name[MAX_MODULE_NAME32 + 1]; +#ifdef UNICODE + int size = WideCharToMultiByte(CP_ACP, 0, module_entry.szModule, -1, name, MAX_MODULE_NAME32, NULL, NULL); + if (size < 1) + { + name[0] = '\0'; + } +#else + strcpy_s(name, sizeof(name), module_entry.szModule); +#endif + CloseHandle(handle); + return name; + } + + success = Module32Next(handle, &module_entry); + } + + CloseHandle(handle); + + return NULL; +} +#endif + +static void rmtGetThreadNameFallback(char* out_thread_name, rmtU32 thread_name_size); + +static void rmtGetThreadName(rmtThreadId thread_id, rmtThreadHandle thread_handle, char* out_thread_name, rmtU32 thread_name_size) +{ +#ifdef RMT_PLATFORM_WINDOWS + DWORD_PTR address; + const char* module_name; + rmtU32 len; + + // Use the new Windows 10 GetThreadDescription function + HMODULE kernel32 = GetModuleHandleA("Kernel32.dll"); + if (kernel32 != NULL) + { + typedef HRESULT(WINAPI* GETTHREADDESCRIPTION)(HANDLE hThread, PWSTR *ppszThreadDescription); + GETTHREADDESCRIPTION GetThreadDescription = (GETTHREADDESCRIPTION)GetProcAddress(kernel32, "GetThreadDescription"); + if (GetThreadDescription != NULL) + { + int size; + + WCHAR* thread_name_w; + GetThreadDescription(thread_handle, &thread_name_w); + + // Returned size is the byte size, so will be 1 for a null-terminated strings + size = WideCharToMultiByte(CP_ACP, 0, thread_name_w, -1, out_thread_name, thread_name_size, NULL, NULL); + if (size > 1) + { + return; + } + } + } + + #ifndef _XBOX_ONE + // At this point GetThreadDescription hasn't returned anything so let's get the thread module name and use that + address = GetThreadStartAddress(thread_handle); + if (address == 0) + { + rmtGetThreadNameFallback(out_thread_name, thread_name_size); + return; + } + module_name = GetStartAddressModuleName(address); + if (module_name == NULL) + { + rmtGetThreadNameFallback(out_thread_name, thread_name_size); + return; + } + #else + rmtGetThreadNameFallback(out_thread_name, thread_name_size); + return; + #endif + + // Concatenate thread name with then thread ID as that will be unique, whereas the start address won't be + memset(out_thread_name, 0, thread_name_size); + strcpy_s(out_thread_name, thread_name_size, module_name); + strncat_s(out_thread_name, thread_name_size, "!", 1); + len = strnlen_s(out_thread_name, thread_name_size); + itoahex_s(out_thread_name + len, thread_name_size - len, thread_id); + +#elif defined(RMT_PLATFORM_MACOS) + + int ret = pthread_getname_np(pthread_self(), out_thread_name, thread_name_size); + if (ret != 0 || out_thread_name[0] == '\0') + { + rmtGetThreadNameFallback(out_thread_name, thread_name_size); + } + +#elif defined(RMT_PLATFORM_LINUX) && RMT_USE_POSIX_THREADNAMES && !defined(__FreeBSD__) && !defined(__OpenBSD__) + + prctl(PR_GET_NAME, out_thread_name, 0, 0, 0); + +#else + + rmtGetThreadNameFallback(out_thread_name, thread_name_size); + +#endif +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @THREADS: Cross-platform thread object +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +typedef struct Thread_t rmtThread; +typedef rmtError (*ThreadProc)(rmtThread* thread); + +struct Thread_t +{ + rmtThreadHandle handle; + + // Callback executed when the thread is created + ThreadProc callback; + + // Caller-specified parameter passed to Thread_Create + void* param; + + // Error state returned from callback + rmtError error; + + // External threads can set this to request an exit + rmtAtomicBool request_exit; +}; + +#if defined(RMT_PLATFORM_WINDOWS) + +static DWORD WINAPI ThreadProcWindows(LPVOID lpParameter) +{ + rmtThread* thread = (rmtThread*)lpParameter; + assert(thread != NULL); + thread->error = thread->callback(thread); + return thread->error == RMT_ERROR_NONE ? 0 : 1; +} + +#else +static void* StartFunc(void* pArgs) +{ + rmtThread* thread = (rmtThread*)pArgs; + assert(thread != NULL); + thread->error = thread->callback(thread); + return NULL; // returned error not use, check thread->error. +} +#endif + +static int rmtThread_Valid(rmtThread* thread) +{ + assert(thread != NULL); + +#if defined(RMT_PLATFORM_WINDOWS) + return thread->handle != NULL; +#else + return !pthread_equal(thread->handle, pthread_self()); +#endif +} + +static rmtError rmtThread_Constructor(rmtThread* thread, ThreadProc callback, void* param) +{ + assert(thread != NULL); + + thread->callback = callback; + thread->param = param; + thread->error = RMT_ERROR_NONE; + thread->request_exit = RMT_FALSE; + + // OS-specific thread creation + +#if defined(RMT_PLATFORM_WINDOWS) + + thread->handle = CreateThread(NULL, // lpThreadAttributes + 0, // dwStackSize + ThreadProcWindows, // lpStartAddress + thread, // lpParameter + 0, // dwCreationFlags + NULL); // lpThreadId + + if (thread->handle == NULL) + return RMT_ERROR_CREATE_THREAD_FAIL; + +#else + + int32_t error = pthread_create(&thread->handle, NULL, StartFunc, thread); + if (error) + { + // Contents of 'thread' parameter to pthread_create() are undefined after + // failure call so can't pre-set to invalid value before hand. + thread->handle = pthread_self(); + return RMT_ERROR_CREATE_THREAD_FAIL; + } + +#endif + + return RMT_ERROR_NONE; +} + +static void rmtThread_RequestExit(rmtThread* thread) +{ + // Not really worried about memory barriers or delayed visibility to the target thread + assert(thread != NULL); + thread->request_exit = RMT_TRUE; +} + +static void rmtThread_Join(rmtThread* thread) +{ + assert(rmtThread_Valid(thread)); + +#if defined(RMT_PLATFORM_WINDOWS) + WaitForSingleObject(thread->handle, INFINITE); +#else + pthread_join(thread->handle, NULL); +#endif +} + +static void rmtThread_Destructor(rmtThread* thread) +{ + assert(thread != NULL); + + if (rmtThread_Valid(thread)) + { + // Shutdown the thread + rmtThread_RequestExit(thread); + rmtThread_Join(thread); + + // OS-specific release of thread resources + +#if defined(RMT_PLATFORM_WINDOWS) + CloseHandle(thread->handle); + thread->handle = NULL; +#endif + } +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @OBJALLOC: Reusable Object Allocator +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// +// All objects that require free-list-backed allocation need to inherit from this type. +// +typedef struct ObjectLink_s +{ + struct ObjectLink_s* volatile next; +} ObjectLink; + +typedef rmtAtomicPtr(ObjectLink) rmtAtomicObjectLinkPtr; + +static void ObjectLink_Constructor(ObjectLink* link) +{ + assert(link != NULL); + link->next = NULL; +} + +typedef rmtError (*ObjConstructor)(void*); +typedef void (*ObjDestructor)(void*); + +typedef struct +{ + // Object create/destroy parameters + rmtU32 object_size; + ObjConstructor constructor; + ObjDestructor destructor; + + // Number of objects in the free list + rmtAtomicS32 nb_free; + + // Number of objects used by callers + rmtAtomicS32 nb_inuse; + + // Total allocation count + rmtAtomicS32 nb_allocated; + + rmtAtomicObjectLinkPtr first_free; +} ObjectAllocator; + +static rmtError ObjectAllocator_Constructor(ObjectAllocator* allocator, rmtU32 object_size, ObjConstructor constructor, + ObjDestructor destructor) +{ + allocator->object_size = object_size; + allocator->constructor = constructor; + allocator->destructor = destructor; + allocator->nb_free = 0; + allocator->nb_inuse = 0; + allocator->nb_allocated = 0; + allocator->first_free = (ObjectLink*)0; + return RMT_ERROR_NONE; +} + +static void ObjectAllocator_Destructor(ObjectAllocator* allocator) +{ + // Ensure everything has been released to the allocator + assert(allocator != NULL); + assert(allocator->nb_inuse == 0); + + // Destroy all objects released to the allocator + while (allocator->first_free != NULL) + { + ObjectLink* next = ((ObjectLink*)allocator->first_free)->next; + assert(allocator->destructor != NULL); + allocator->destructor((void*)allocator->first_free); + rmtFree((void*)allocator->first_free); + allocator->first_free = next; + } +} + +static void ObjectAllocator_Push(ObjectAllocator* allocator, ObjectLink* start, ObjectLink* end) +{ + assert(allocator != NULL); + assert(start != NULL); + assert(end != NULL); + + // CAS pop add range to the front of the list + for (;;) + { + ObjectLink* old_link = (ObjectLink*)allocator->first_free; + end->next = old_link; + if (AtomicCompareAndSwapPointer((rmtAtomicVoidPtr*)&allocator->first_free, (void*)old_link, (void*)start) == + RMT_TRUE) + break; + } +} + +static ObjectLink* ObjectAllocator_Pop(ObjectAllocator* allocator) +{ + ObjectLink* link; + + assert(allocator != NULL); + + // CAS pop from the front of the list + for (;;) + { + ObjectLink* old_link = (ObjectLink*)allocator->first_free; + if (old_link == NULL) + { + return NULL; + } + ObjectLink* next_link = old_link->next; + if (AtomicCompareAndSwapPointer((rmtAtomicVoidPtr*)&allocator->first_free, (void*)old_link, (void*)next_link) == + RMT_TRUE) + { + link = (ObjectLink*)old_link; + break; + } + } + + link->next = NULL; + + return link; +} + +static rmtError ObjectAllocator_Alloc(ObjectAllocator* allocator, void** object) +{ + // This function only calls the object constructor on initial malloc of an object + + assert(allocator != NULL); + assert(object != NULL); + + // Pull available objects from the free list + *object = ObjectAllocator_Pop(allocator); + + // Has the free list run out? + if (*object == NULL) + { + rmtError error; + + // Allocate/construct a new object + *object = rmtMalloc(allocator->object_size); + if (*object == NULL) + return RMT_ERROR_MALLOC_FAIL; + assert(allocator->constructor != NULL); + error = allocator->constructor(*object); + if (error != RMT_ERROR_NONE) + { + // Auto-teardown on failure + assert(allocator->destructor != NULL); + allocator->destructor(*object); + rmtFree(*object); + return error; + } + + AtomicAddS32(&allocator->nb_allocated, 1); + } + else + { + AtomicSubS32(&allocator->nb_free, 1); + } + + AtomicAddS32(&allocator->nb_inuse, 1); + + return RMT_ERROR_NONE; +} + +static void ObjectAllocator_Free(ObjectAllocator* allocator, void* object) +{ + // Add back to the free-list + assert(allocator != NULL); + ObjectAllocator_Push(allocator, (ObjectLink*)object, (ObjectLink*)object); + AtomicSubS32(&allocator->nb_inuse, 1); + AtomicAddS32(&allocator->nb_free, 1); +} + +static void ObjectAllocator_FreeRange(ObjectAllocator* allocator, void* start, void* end, rmtU32 count) +{ + assert(allocator != NULL); + ObjectAllocator_Push(allocator, (ObjectLink*)start, (ObjectLink*)end); + AtomicSubS32(&allocator->nb_inuse, count); + AtomicAddS32(&allocator->nb_free, count); +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @DYNBUF: Dynamic Buffer +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +typedef struct +{ + rmtU32 alloc_granularity; + + rmtU32 bytes_allocated; + rmtU32 bytes_used; + + rmtU8* data; +} Buffer; + +static rmtError Buffer_Constructor(Buffer* buffer, rmtU32 alloc_granularity) +{ + assert(buffer != NULL); + buffer->alloc_granularity = alloc_granularity; + buffer->bytes_allocated = 0; + buffer->bytes_used = 0; + buffer->data = NULL; + return RMT_ERROR_NONE; +} + +static void Buffer_Destructor(Buffer* buffer) +{ + assert(buffer != NULL); + + if (buffer->data != NULL) + { + rmtFree(buffer->data); + buffer->data = NULL; + } +} + +static rmtError Buffer_Grow(Buffer* buffer, rmtU32 length) +{ + // Calculate size increase rounded up to the requested allocation granularity + rmtU32 granularity = buffer->alloc_granularity; + rmtU32 allocate = buffer->bytes_allocated + length; + allocate = allocate + ((granularity - 1) - ((allocate - 1) % granularity)); + + buffer->bytes_allocated = allocate; + buffer->data = (rmtU8*)rmtRealloc(buffer->data, buffer->bytes_allocated); + if (buffer->data == NULL) + return RMT_ERROR_MALLOC_FAIL; + + return RMT_ERROR_NONE; +} + +static rmtError Buffer_Pad(Buffer* buffer, rmtU32 length) +{ + assert(buffer != NULL); + + // Reallocate the buffer on overflow + if (buffer->bytes_used + length > buffer->bytes_allocated) + { + rmtTry(Buffer_Grow(buffer, length)); + } + + // Step by the pad amount + buffer->bytes_used += length; + + return RMT_ERROR_NONE; +} + +static rmtError Buffer_AlignedPad(Buffer* buffer, rmtU32 start_pos) +{ + return Buffer_Pad(buffer, (4 - ((buffer->bytes_used - start_pos) & 3)) & 3); +} + +static rmtError Buffer_Write(Buffer* buffer, const void* data, rmtU32 length) +{ + assert(buffer != NULL); + + // Reallocate the buffer on overflow + if (buffer->bytes_used + length > buffer->bytes_allocated) + { + rmtTry(Buffer_Grow(buffer, length)); + } + + // Copy all bytes + memcpy(buffer->data + buffer->bytes_used, data, length); + buffer->bytes_used += length; + + return RMT_ERROR_NONE; +} + +static rmtError Buffer_WriteStringZ(Buffer* buffer, rmtPStr string) +{ + assert(string != NULL); + return Buffer_Write(buffer, (void*)string, (rmtU32)strnlen_s(string, 2048) + 1); +} + +static void U32ToByteArray(rmtU8* dest, rmtU32 value) +{ + // Commit as little-endian + dest[0] = value & 255; + dest[1] = (value >> 8) & 255; + dest[2] = (value >> 16) & 255; + dest[3] = value >> 24; +} + +static rmtError Buffer_WriteBool(Buffer* buffer, rmtBool value) +{ + return Buffer_Write(buffer, &value, 1); +} + +static rmtError Buffer_WriteU32(Buffer* buffer, rmtU32 value) +{ + assert(buffer != NULL); + + // Reallocate the buffer on overflow + if (buffer->bytes_used + sizeof(value) > buffer->bytes_allocated) + { + rmtTry(Buffer_Grow(buffer, sizeof(value))); + } + +// Copy all bytes +#if RMT_ASSUME_LITTLE_ENDIAN + *(rmtU32*)(buffer->data + buffer->bytes_used) = value; +#else + U32ToByteArray(buffer->data + buffer->bytes_used, value); +#endif + + buffer->bytes_used += sizeof(value); + + return RMT_ERROR_NONE; +} + +static rmtBool IsLittleEndian() +{ + // Not storing this in a global variable allows the compiler to more easily optimise + // this away altogether. + union { + unsigned int i; + unsigned char c[sizeof(unsigned int)]; + } u; + u.i = 1; + return u.c[0] == 1 ? RMT_TRUE : RMT_FALSE; +} + +static rmtError Buffer_WriteF64(Buffer* buffer, rmtF64 value) +{ + assert(buffer != NULL); + + // Reallocate the buffer on overflow + if (buffer->bytes_used + sizeof(value) > buffer->bytes_allocated) + { + rmtTry(Buffer_Grow(buffer, sizeof(value))); + } + +// Copy all bytes +#if RMT_ASSUME_LITTLE_ENDIAN + *(rmtF64*)(buffer->data + buffer->bytes_used) = value; +#else + { + union { + double d; + unsigned char c[sizeof(double)]; + } u; + rmtU8* dest = buffer->data + buffer->bytes_used; + u.d = value; + if (IsLittleEndian()) + { + dest[0] = u.c[0]; + dest[1] = u.c[1]; + dest[2] = u.c[2]; + dest[3] = u.c[3]; + dest[4] = u.c[4]; + dest[5] = u.c[5]; + dest[6] = u.c[6]; + dest[7] = u.c[7]; + } + else + { + dest[0] = u.c[7]; + dest[1] = u.c[6]; + dest[2] = u.c[5]; + dest[3] = u.c[4]; + dest[4] = u.c[3]; + dest[5] = u.c[2]; + dest[6] = u.c[1]; + dest[7] = u.c[0]; + } + } +#endif + + buffer->bytes_used += sizeof(value); + + return RMT_ERROR_NONE; +} + +static rmtError Buffer_WriteU64(Buffer* buffer, rmtU64 value) +{ + // Write as a double as Javascript DataView doesn't have a 64-bit integer read + return Buffer_WriteF64(buffer, (double)value); +} + +static rmtError Buffer_WriteStringWithLength(Buffer* buffer, rmtPStr string) +{ + rmtU32 length = (rmtU32)strnlen_s(string, 2048); + rmtTry(Buffer_WriteU32(buffer, length)); + return Buffer_Write(buffer, (void*)string, length); +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @HASHTABLE: Integer pair hash map for inserts/finds. No removes for added simplicity. +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#define RMT_NOT_FOUND 0xffffffffffffffff + +typedef struct +{ + // Non-zero, pre-hashed key + rmtU32 key; + + // Value that's not equal to RMT_NOT_FOUND + rmtU64 value; +} HashSlot; + +typedef struct +{ + // Stats + rmtU32 maxNbSlots; + rmtU32 nbSlots; + + // Data + HashSlot* slots; +} rmtHashTable; + +static rmtError rmtHashTable_Constructor(rmtHashTable* table, rmtU32 max_nb_slots) +{ + // Default initialise + assert(table != NULL); + table->maxNbSlots = max_nb_slots; + table->nbSlots = 0; + + // Allocate and clear the hash slots + rmtTryMallocArray(HashSlot, table->slots, table->maxNbSlots); + memset(table->slots, 0, table->maxNbSlots * sizeof(HashSlot)); + + return RMT_ERROR_NONE; +} + +static void rmtHashTable_Destructor(rmtHashTable* table) +{ + assert(table != NULL); + + if (table->slots != NULL) + { + rmtFree(table->slots); + table->slots = NULL; + } +} + +static rmtError rmtHashTable_Resize(rmtHashTable* table); + +static rmtError rmtHashTable_Insert(rmtHashTable* table, rmtU32 key, rmtU64 value) +{ + HashSlot* slot = NULL; + rmtError error = RMT_ERROR_NONE; + + // Calculate initial slot location for this key + rmtU32 index_mask = table->maxNbSlots - 1; + rmtU32 index = key & index_mask; + + assert(key != 0); + assert(value != RMT_NOT_FOUND); + + // Linear probe for free slot, reusing any existing key matches + // There will always be at least one free slot due to load factor management + while (table->slots[index].key) + { + if (table->slots[index].key == key) + { + // Counter occupied slot increments below + table->nbSlots--; + break; + } + + index = (index + 1) & index_mask; + } + + // Just verify that I've got no errors in the code above + assert(index < table->maxNbSlots); + + // Add to the table + slot = table->slots + index; + slot->key = key; + slot->value = value; + table->nbSlots++; + + // Resize when load factor is greater than 2/3 + if (table->nbSlots > (table->maxNbSlots * 2) / 3) + { + error = rmtHashTable_Resize(table); + } + + return error; +} + +static rmtError rmtHashTable_Resize(rmtHashTable* table) +{ + rmtU32 old_max_nb_slots = table->maxNbSlots; + HashSlot* new_slots = NULL; + HashSlot* old_slots = table->slots; + rmtU32 i; + + // Increase the table size + rmtU32 new_max_nb_slots = table->maxNbSlots; + if (new_max_nb_slots < 8192 * 4) + { + new_max_nb_slots *= 4; + } + else + { + new_max_nb_slots *= 2; + } + + // Allocate and clear a new table + rmtTryMallocArray(HashSlot, new_slots, new_max_nb_slots); + memset(new_slots, 0, new_max_nb_slots * sizeof(HashSlot)); + + // Update fields of the table after successful allocation only + table->slots = new_slots; + table->maxNbSlots = new_max_nb_slots; + table->nbSlots = 0; + + // Reinsert all objects into the new table + for (i = 0; i < old_max_nb_slots; i++) + { + HashSlot* slot = old_slots + i; + if (slot->key != 0) + { + rmtHashTable_Insert(table, slot->key, slot->value); + } + } + + rmtFree(old_slots); + + return RMT_ERROR_NONE; +} + +static rmtU64 rmtHashTable_Find(rmtHashTable* table, rmtU32 key) +{ + // Calculate initial slot location for this key + rmtU32 index_mask = table->maxNbSlots - 1; + rmtU32 index = key & index_mask; + + // Linear probe for matching hash + while (table->slots[index].key) + { + HashSlot* slot = table->slots + index; + + if (slot->key == key) + { + return slot->value; + } + + index = (index + 1) & index_mask; + } + + return RMT_NOT_FOUND; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @STRINGTABLE: Map from string hash to string offset in local buffer +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +typedef struct +{ + // Growable dynamic array of strings added so far + Buffer* text; + + // Map from text hash to text location in the buffer + rmtHashTable* text_map; +} StringTable; + +static rmtError StringTable_Constructor(StringTable* table) +{ + // Default initialise + assert(table != NULL); + table->text = NULL; + table->text_map = NULL; + + // Allocate reasonably storage for initial sample names + rmtTryNew(Buffer, table->text, 8 * 1024); + rmtTryNew(rmtHashTable, table->text_map, 1 * 1024); + + return RMT_ERROR_NONE; +} + +static void StringTable_Destructor(StringTable* table) +{ + assert(table != NULL); + + rmtDelete(rmtHashTable, table->text_map); + rmtDelete(Buffer, table->text); +} + +static rmtPStr StringTable_Find(StringTable* table, rmtU32 name_hash) +{ + rmtU64 text_offset = rmtHashTable_Find(table->text_map, name_hash); + if (text_offset != RMT_NOT_FOUND) + { + return (rmtPStr)(table->text->data + text_offset); + } + return NULL; +} + +static rmtBool StringTable_Insert(StringTable* table, rmtU32 name_hash, rmtPStr name) +{ + // Only add to the buffer if the string isn't already there + rmtU64 text_offset = rmtHashTable_Find(table->text_map, name_hash); + if (text_offset == RMT_NOT_FOUND) + { + // TODO: Allocation errors aren't being passed on to the caller + text_offset = table->text->bytes_used; + Buffer_WriteStringZ(table->text, name); + rmtHashTable_Insert(table->text_map, name_hash, text_offset); + return RMT_TRUE; + } + + return RMT_FALSE; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @SOCKETS: Sockets TCP/IP Wrapper +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#ifndef RMT_PLATFORM_WINDOWS +typedef int SOCKET; +#define INVALID_SOCKET -1 +#define SOCKET_ERROR -1 +#define SD_SEND SHUT_WR +#define closesocket close +#endif + +typedef struct +{ + SOCKET socket; +} TCPSocket; + +typedef struct +{ + rmtBool can_read; + rmtBool can_write; + rmtError error_state; +} SocketStatus; + +// +// Function prototypes +// +static void TCPSocket_Close(TCPSocket* tcp_socket); + +static rmtError InitialiseNetwork() +{ +#ifdef RMT_PLATFORM_WINDOWS + + WSADATA wsa_data; + if (WSAStartup(MAKEWORD(2, 2), &wsa_data)) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "WSAStartup failed"); + } + if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "WSAStartup returned incorrect version number"); + } + + return RMT_ERROR_NONE; + +#else + + return RMT_ERROR_NONE; + +#endif +} + +static void ShutdownNetwork() +{ +#ifdef RMT_PLATFORM_WINDOWS + WSACleanup(); +#endif +} + +static rmtError TCPSocket_Constructor(TCPSocket* tcp_socket) +{ + assert(tcp_socket != NULL); + tcp_socket->socket = INVALID_SOCKET; + return InitialiseNetwork(); +} + +static void TCPSocket_Destructor(TCPSocket* tcp_socket) +{ + assert(tcp_socket != NULL); + TCPSocket_Close(tcp_socket); + ShutdownNetwork(); +} + +static rmtError TCPSocket_RunServer(TCPSocket* tcp_socket, rmtU16 port, rmtBool reuse_open_port, + rmtBool limit_connections_to_localhost) +{ + SOCKET s = INVALID_SOCKET; + struct sockaddr_in sin; +#ifdef RMT_PLATFORM_WINDOWS + u_long nonblock = 1; +#endif + + memset(&sin, 0, sizeof(sin)); + assert(tcp_socket != NULL); + + // Try to create the socket + s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (s == SOCKET_ERROR) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "Can't create a socket for connection to the remote viewer"); + } + + if (reuse_open_port) + { + int enable = 1; + +// set SO_REUSEADDR so binding doesn't fail when restarting the application +// (otherwise the same port can't be reused within TIME_WAIT) +// I'm not checking for errors because if this fails (unlikely) we might still +// be able to bind to the socket anyway +#ifdef RMT_PLATFORM_POSIX + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable)); +#elif defined(RMT_PLATFORM_WINDOWS) + // windows also needs SO_EXCLUSEIVEADDRUSE, + // see http://www.andy-pearce.com/blog/posts/2013/Feb/so_reuseaddr-on-windows/ + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char*)&enable, sizeof(enable)); + enable = 1; + setsockopt(s, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, (char*)&enable, sizeof(enable)); +#endif + } + + // Bind the socket to the incoming port + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(limit_connections_to_localhost ? INADDR_LOOPBACK : INADDR_ANY); + sin.sin_port = htons(port); + if (bind(s, (struct sockaddr*)&sin, sizeof(sin)) == SOCKET_ERROR) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Can't bind a socket for the server"); + } + + // Connection is valid, remaining code is socket state modification + tcp_socket->socket = s; + + // Enter a listening state with a backlog of 1 connection + if (listen(s, 1) == SOCKET_ERROR) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Created server socket failed to enter a listen state"); + } + +// Set as non-blocking +#ifdef RMT_PLATFORM_WINDOWS + if (ioctlsocket(tcp_socket->socket, FIONBIO, &nonblock) == SOCKET_ERROR) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Created server socket failed to switch to a non-blocking state"); + } +#else + if (fcntl(tcp_socket->socket, F_SETFL, O_NONBLOCK) == SOCKET_ERROR) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Created server socket failed to switch to a non-blocking state"); + } +#endif + + return RMT_ERROR_NONE; +} + +static void TCPSocket_Close(TCPSocket* tcp_socket) +{ + assert(tcp_socket != NULL); + + if (tcp_socket->socket != INVALID_SOCKET) + { + // Shutdown the connection, stopping all sends + int result = shutdown(tcp_socket->socket, SD_SEND); + if (result != SOCKET_ERROR) + { + // Keep receiving until the peer closes the connection + int total = 0; + char temp_buf[128]; + while (result > 0) + { + result = (int)recv(tcp_socket->socket, temp_buf, sizeof(temp_buf), 0); + total += result; + } + } + + // Close the socket and issue a network shutdown request + closesocket(tcp_socket->socket); + tcp_socket->socket = INVALID_SOCKET; + } +} + +static SocketStatus TCPSocket_PollStatus(TCPSocket* tcp_socket) +{ + SocketStatus status; + fd_set fd_read, fd_write, fd_errors; + struct timeval tv; + + status.can_read = RMT_FALSE; + status.can_write = RMT_FALSE; + status.error_state = RMT_ERROR_NONE; + + assert(tcp_socket != NULL); + if (tcp_socket->socket == INVALID_SOCKET) + { + status.error_state = RMT_ERROR_SOCKET_INVALID_POLL; + return status; + } + + // Set read/write/error markers for the socket + FD_ZERO(&fd_read); + FD_ZERO(&fd_write); + FD_ZERO(&fd_errors); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // warning C4127: conditional expression is constant +#endif // _MSC_VER + FD_SET(tcp_socket->socket, &fd_read); + FD_SET(tcp_socket->socket, &fd_write); + FD_SET(tcp_socket->socket, &fd_errors); +#ifdef _MSC_VER +#pragma warning(pop) +#endif // _MSC_VER + + // Poll socket status without blocking + tv.tv_sec = 0; + tv.tv_usec = 0; + if (select(((int)tcp_socket->socket) + 1, &fd_read, &fd_write, &fd_errors, &tv) == SOCKET_ERROR) + { + status.error_state = RMT_ERROR_SOCKET_SELECT_FAIL; + return status; + } + + status.can_read = FD_ISSET(tcp_socket->socket, &fd_read) != 0 ? RMT_TRUE : RMT_FALSE; + status.can_write = FD_ISSET(tcp_socket->socket, &fd_write) != 0 ? RMT_TRUE : RMT_FALSE; + status.error_state = FD_ISSET(tcp_socket->socket, &fd_errors) != 0 ? RMT_ERROR_SOCKET_POLL_ERRORS : RMT_ERROR_NONE; + return status; +} + +static rmtError TCPSocket_AcceptConnection(TCPSocket* tcp_socket, TCPSocket** client_socket) +{ + SocketStatus status; + SOCKET s; + + // Ensure there is an incoming connection + assert(tcp_socket != NULL); + status = TCPSocket_PollStatus(tcp_socket); + if (status.error_state != RMT_ERROR_NONE || !status.can_read) + return status.error_state; + + // Accept the connection + s = accept(tcp_socket->socket, 0, 0); + if (s == SOCKET_ERROR) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "Server failed to accept connection from client"); + } + +#ifdef SO_NOSIGPIPE + // On POSIX systems, send() may send a SIGPIPE signal when writing to an + // already closed connection. By setting this option, we prevent the + // signal from being emitted and send will instead return an error and set + // errno to EPIPE. + // + // This is supported on BSD platforms and not on Linux. + { + int flag = 1; + setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &flag, sizeof(flag)); + } +#endif + // Create a client socket for the new connection + assert(client_socket != NULL); + rmtTryNew(TCPSocket, *client_socket); + (*client_socket)->socket = s; + + return RMT_ERROR_NONE; +} + +static int TCPTryAgain() +{ +#ifdef RMT_PLATFORM_WINDOWS + DWORD error = WSAGetLastError(); + return error == WSAEWOULDBLOCK; +#else +#if EAGAIN == EWOULDBLOCK + return errno == EAGAIN; +#else + return errno == EAGAIN || errno == EWOULDBLOCK; +#endif +#endif +} + +static rmtError TCPSocket_Send(TCPSocket* tcp_socket, const void* data, rmtU32 length, rmtU32 timeout_ms) +{ + SocketStatus status; + char* cur_data = NULL; + char* end_data = NULL; + rmtU32 start_ms = 0; + rmtU32 cur_ms = 0; + + assert(tcp_socket != NULL); + + start_ms = msTimer_Get(); + + // Loop until timeout checking whether data can be written + status.can_write = RMT_FALSE; + while (!status.can_write) + { + status = TCPSocket_PollStatus(tcp_socket); + if (status.error_state != RMT_ERROR_NONE) + return status.error_state; + + cur_ms = msTimer_Get(); + if (cur_ms - start_ms > timeout_ms) + { + return rmtMakeError(RMT_ERROR_TIMEOUT, "Timed out trying to send data"); + } + } + + cur_data = (char*)data; + end_data = cur_data + length; + + while (cur_data < end_data) + { + // Attempt to send the remaining chunk of data + int bytes_sent; + int send_flags = 0; +#ifdef MSG_NOSIGNAL + // On Linux this prevents send from emitting a SIGPIPE signal + // Equivalent on BSD to the SO_NOSIGPIPE option. + send_flags = MSG_NOSIGNAL; +#endif + bytes_sent = (int)send(tcp_socket->socket, cur_data, (int)(end_data - cur_data), send_flags); + + if (bytes_sent == SOCKET_ERROR || bytes_sent == 0) + { + // Close the connection if sending fails for any other reason other than blocking + if (bytes_sent != 0 && !TCPTryAgain()) + return RMT_ERROR_SOCKET_SEND_FAIL; + + // First check for tick-count overflow and reset, giving a slight hitch every 49.7 days + cur_ms = msTimer_Get(); + if (cur_ms < start_ms) + { + start_ms = cur_ms; + continue; + } + + // + // Timeout can happen when: + // + // 1) endpoint is no longer there + // 2) endpoint can't consume quick enough + // 3) local buffers overflow + // + // As none of these are actually errors, we have to pass this timeout back to the caller. + // + // TODO: This strategy breaks down if a send partially completes and then times out! + // + if (cur_ms - start_ms > timeout_ms) + { + return rmtMakeError(RMT_ERROR_TIMEOUT, "Timed out trying to send data"); + } + } + else + { + // Jump over the data sent + cur_data += bytes_sent; + } + } + + return RMT_ERROR_NONE; +} + +static rmtError TCPSocket_Receive(TCPSocket* tcp_socket, void* data, rmtU32 length, rmtU32 timeout_ms) +{ + SocketStatus status; + char* cur_data = NULL; + char* end_data = NULL; + rmtU32 start_ms = 0; + rmtU32 cur_ms = 0; + + assert(tcp_socket != NULL); + + // Ensure there is data to receive + status = TCPSocket_PollStatus(tcp_socket); + if (status.error_state != RMT_ERROR_NONE) + return status.error_state; + if (!status.can_read) + return RMT_ERROR_SOCKET_RECV_NO_DATA; + + cur_data = (char*)data; + end_data = cur_data + length; + + // Loop until all data has been received + start_ms = msTimer_Get(); + while (cur_data < end_data) + { + int bytes_received = (int)recv(tcp_socket->socket, cur_data, (int)(end_data - cur_data), 0); + + if (bytes_received == SOCKET_ERROR || bytes_received == 0) + { + // Close the connection if receiving fails for any other reason other than blocking + if (bytes_received != 0 && !TCPTryAgain()) + return RMT_ERROR_SOCKET_RECV_FAILED; + + // First check for tick-count overflow and reset, giving a slight hitch every 49.7 days + cur_ms = msTimer_Get(); + if (cur_ms < start_ms) + { + start_ms = cur_ms; + continue; + } + + // + // Timeout can happen when: + // + // 1) data is delayed by sender + // 2) sender fails to send a complete set of packets + // + // As not all of these scenarios are errors, we need to pass this information back to the caller. + // + // TODO: This strategy breaks down if a receive partially completes and then times out! + // + if (cur_ms - start_ms > timeout_ms) + { + return RMT_ERROR_SOCKET_RECV_TIMEOUT; + } + } + else + { + // Jump over the data received + cur_data += bytes_received; + } + } + + return RMT_ERROR_NONE; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @SHA1: SHA-1 Cryptographic Hash Function +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// +// Typed to allow enforced data size specification +// +typedef struct +{ + rmtU8 data[20]; +} SHA1; + +/* + Copyright (c) 2011, Micael Hildenborg + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Micael Hildenborg nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY Micael Hildenborg ''AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL Micael Hildenborg BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + Contributors: + Gustav + Several members in the gamedev.se forum. + Gregory Petrosyan + */ + +// Rotate an integer value to left. +static unsigned int rol(const unsigned int value, const unsigned int steps) +{ + return ((value << steps) | (value >> (32 - steps))); +} + +// Sets the first 16 integers in the buffert to zero. +// Used for clearing the W buffert. +static void clearWBuffert(unsigned int* buffert) +{ + int pos; + for (pos = 16; --pos >= 0;) + { + buffert[pos] = 0; + } +} + +static void innerHash(unsigned int* result, unsigned int* w) +{ + unsigned int a = result[0]; + unsigned int b = result[1]; + unsigned int c = result[2]; + unsigned int d = result[3]; + unsigned int e = result[4]; + + int round = 0; + +#define sha1macro(func, val) \ + { \ + const unsigned int t = rol(a, 5) + (func) + e + val + w[round]; \ + e = d; \ + d = c; \ + c = rol(b, 30); \ + b = a; \ + a = t; \ + } + + while (round < 16) + { + sha1macro((b & c) | (~b & d), 0x5a827999); + ++round; + } + while (round < 20) + { + w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1); + sha1macro((b & c) | (~b & d), 0x5a827999); + ++round; + } + while (round < 40) + { + w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1); + sha1macro(b ^ c ^ d, 0x6ed9eba1); + ++round; + } + while (round < 60) + { + w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1); + sha1macro((b & c) | (b & d) | (c & d), 0x8f1bbcdc); + ++round; + } + while (round < 80) + { + w[round] = rol((w[round - 3] ^ w[round - 8] ^ w[round - 14] ^ w[round - 16]), 1); + sha1macro(b ^ c ^ d, 0xca62c1d6); + ++round; + } + +#undef sha1macro + + result[0] += a; + result[1] += b; + result[2] += c; + result[3] += d; + result[4] += e; +} + +static void calc(const void* src, const int bytelength, unsigned char* hash) +{ + int roundPos; + int lastBlockBytes; + int hashByte; + + // Init the result array. + unsigned int result[5] = {0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0}; + + // Cast the void src pointer to be the byte array we can work with. + const unsigned char* sarray = (const unsigned char*)src; + + // The reusable round buffer + unsigned int w[80]; + + // Loop through all complete 64byte blocks. + const int endOfFullBlocks = bytelength - 64; + int endCurrentBlock; + int currentBlock = 0; + + while (currentBlock <= endOfFullBlocks) + { + endCurrentBlock = currentBlock + 64; + + // Init the round buffer with the 64 byte block data. + for (roundPos = 0; currentBlock < endCurrentBlock; currentBlock += 4) + { + // This line will swap endian on big endian and keep endian on little endian. + w[roundPos++] = (unsigned int)sarray[currentBlock + 3] | (((unsigned int)sarray[currentBlock + 2]) << 8) | + (((unsigned int)sarray[currentBlock + 1]) << 16) | + (((unsigned int)sarray[currentBlock]) << 24); + } + innerHash(result, w); + } + + // Handle the last and not full 64 byte block if existing. + endCurrentBlock = bytelength - currentBlock; + clearWBuffert(w); + lastBlockBytes = 0; + for (; lastBlockBytes < endCurrentBlock; ++lastBlockBytes) + { + w[lastBlockBytes >> 2] |= (unsigned int)sarray[lastBlockBytes + currentBlock] + << ((3 - (lastBlockBytes & 3)) << 3); + } + w[lastBlockBytes >> 2] |= 0x80U << ((3 - (lastBlockBytes & 3)) << 3); + if (endCurrentBlock >= 56) + { + innerHash(result, w); + clearWBuffert(w); + } + w[15] = bytelength << 3; + innerHash(result, w); + + // Store hash in result pointer, and make sure we get in in the correct order on both endian models. + for (hashByte = 20; --hashByte >= 0;) + { + hash[hashByte] = (result[hashByte >> 2] >> (((3 - hashByte) & 0x3) << 3)) & 0xff; + } +} + +static SHA1 SHA1_Calculate(const void* src, unsigned int length) +{ + SHA1 hash; + assert((int)length >= 0); + calc(src, length, hash.data); + return hash; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @BASE64: Base-64 encoder +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +static const char* b64_encoding_table = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +static rmtU32 Base64_CalculateEncodedLength(rmtU32 length) +{ + // ceil(l * 4/3) + return 4 * ((length + 2) / 3); +} + +static void Base64_Encode(const rmtU8* in_bytes, rmtU32 length, rmtU8* out_bytes) +{ + rmtU32 i; + rmtU32 encoded_length; + rmtU32 remaining_bytes; + + rmtU8* optr = out_bytes; + + for (i = 0; i < length;) + { + // Read input 3 values at a time, null terminating + rmtU32 c0 = i < length ? in_bytes[i++] : 0; + rmtU32 c1 = i < length ? in_bytes[i++] : 0; + rmtU32 c2 = i < length ? in_bytes[i++] : 0; + + // Encode 4 bytes for ever 3 input bytes + rmtU32 triple = (c0 << 0x10) + (c1 << 0x08) + c2; + *optr++ = b64_encoding_table[(triple >> 3 * 6) & 0x3F]; + *optr++ = b64_encoding_table[(triple >> 2 * 6) & 0x3F]; + *optr++ = b64_encoding_table[(triple >> 1 * 6) & 0x3F]; + *optr++ = b64_encoding_table[(triple >> 0 * 6) & 0x3F]; + } + + // Pad output to multiple of 3 bytes with terminating '=' + encoded_length = Base64_CalculateEncodedLength(length); + remaining_bytes = (3 - ((length + 2) % 3)) - 1; + for (i = 0; i < remaining_bytes; i++) + out_bytes[encoded_length - 1 - i] = '='; + + // Null terminate + out_bytes[encoded_length] = 0; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @MURMURHASH: MurmurHash3 + https://code.google.com/p/smhasher +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. +//----------------------------------------------------------------------------- + +#if RMT_USE_INTERNAL_HASH_FUNCTION + +static rmtU32 rotl32(rmtU32 x, rmtS8 r) +{ + return (x << r) | (x >> (32 - r)); +} + +// Block read - if your platform needs to do endian-swapping, do the conversion here +static rmtU32 getblock32(const rmtU32* p, int i) +{ + rmtU32 result; + const rmtU8* src = ((const rmtU8*)p) + i * (int)sizeof(rmtU32); + memcpy(&result, src, sizeof(result)); + return result; +} + +// Finalization mix - force all bits of a hash block to avalanche +static rmtU32 fmix32(rmtU32 h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +static rmtU32 MurmurHash3_x86_32(const void* key, int len, rmtU32 seed) +{ + const rmtU8* data = (const rmtU8*)key; + const int nblocks = len / 4; + + rmtU32 h1 = seed; + + const rmtU32 c1 = 0xcc9e2d51; + const rmtU32 c2 = 0x1b873593; + + int i; + + const rmtU32* blocks = (const rmtU32*)(data + nblocks * 4); + const rmtU8* tail = (const rmtU8*)(data + nblocks * 4); + + rmtU32 k1 = 0; + + //---------- + // body + + for (i = -nblocks; i; i++) + { + rmtU32 k2 = getblock32(blocks, i); + + k2 *= c1; + k2 = rotl32(k2, 15); + k2 *= c2; + + h1 ^= k2; + h1 = rotl32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + //---------- + // tail + + switch (len & 3) + { + case 3: + k1 ^= tail[2] << 16; // fallthrough + case 2: + k1 ^= tail[1] << 8; // fallthrough + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = rotl32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + return h1; +} + +RMT_API rmtU32 _rmt_HashString32(const char* s, int len, rmtU32 seed) +{ + return MurmurHash3_x86_32(s, len, seed); +} + +#else + #if defined(__cplusplus) + extern "C" + #endif + RMT_API rmtU32 _rmt_HashString32(const char* s, int len, rmtU32 seed); + +#endif // RMT_USE_INTERNAL_HASH_FUNCTION + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @WEBSOCKETS: WebSockets +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +enum WebSocketMode +{ + WEBSOCKET_NONE = 0, + WEBSOCKET_TEXT = 1, + WEBSOCKET_BINARY = 2, +}; + +typedef struct +{ + TCPSocket* tcp_socket; + + enum WebSocketMode mode; + + rmtU32 frame_bytes_remaining; + rmtU32 mask_offset; + + union { + rmtU8 mask[4]; + rmtU32 mask_u32; + } data; + +} WebSocket; + +static void WebSocket_Close(WebSocket* web_socket); + +static char* GetField(char* buffer, r_size_t buffer_length, rmtPStr field_name) +{ + char* field = NULL; + char* buffer_end = buffer + buffer_length - 1; + + r_size_t field_length = strnlen_s(field_name, buffer_length); + if (field_length == 0) + return NULL; + + // Search for the start of the field + if (strstr_s(buffer, buffer_length, field_name, field_length, &field) != EOK) + return NULL; + + // Field name is now guaranteed to be in the buffer so its safe to jump over it without hitting the bounds + field += strlen(field_name); + + // Skip any trailing whitespace + while (*field == ' ') + { + if (field >= buffer_end) + return NULL; + field++; + } + + return field; +} + +static const char websocket_guid[] = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"; +static const char websocket_response[] = "HTTP/1.1 101 Switching Protocols\r\n" + "Upgrade: websocket\r\n" + "Connection: Upgrade\r\n" + "Sec-WebSocket-Accept: "; + +static rmtError WebSocketHandshake(TCPSocket* tcp_socket, rmtPStr limit_host) +{ + rmtU32 start_ms, now_ms; + + // Parsing scratchpad + char buffer[1024]; + char* buffer_ptr = buffer; + int buffer_len = sizeof(buffer) - 1; + char* buffer_end = buffer + buffer_len; + + char response_buffer[256]; + int response_buffer_len = sizeof(response_buffer) - 1; + + char* version; + char* host; + char* key; + char* key_end; + SHA1 hash; + + assert(tcp_socket != NULL); + + start_ms = msTimer_Get(); + + // Really inefficient way of receiving the handshake data from the browser + // Not really sure how to do this any better, as the termination requirement is \r\n\r\n + while (buffer_ptr - buffer < buffer_len) + { + rmtError error = TCPSocket_Receive(tcp_socket, buffer_ptr, 1, 20); + if (error == RMT_ERROR_SOCKET_RECV_FAILED) + return error; + + // If there's a stall receiving the data, check for a handshake timeout + if (error == RMT_ERROR_SOCKET_RECV_NO_DATA || error == RMT_ERROR_SOCKET_RECV_TIMEOUT) + { + now_ms = msTimer_Get(); + if (now_ms - start_ms > 1000) + return RMT_ERROR_SOCKET_RECV_TIMEOUT; + + continue; + } + + // Just in case new enums are added... + assert(error == RMT_ERROR_NONE); + + if (buffer_ptr - buffer >= 4) + { + if (*(buffer_ptr - 3) == '\r' && *(buffer_ptr - 2) == '\n' && *(buffer_ptr - 1) == '\r' && + *(buffer_ptr - 0) == '\n') + break; + } + + buffer_ptr++; + } + *buffer_ptr = 0; + + // HTTP GET instruction + if (memcmp(buffer, "GET", 3) != 0) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_NOT_GET; + + // Look for the version number and verify that it's supported + version = GetField(buffer, buffer_len, "Sec-WebSocket-Version:"); + if (version == NULL) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_VERSION; + if (buffer_end - version < 2 || (version[0] != '8' && (version[0] != '1' || version[1] != '3'))) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_VERSION; + + // Make sure this connection comes from a known host + host = GetField(buffer, buffer_len, "Host:"); + if (host == NULL) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_HOST; + if (limit_host != NULL) + { + r_size_t limit_host_len = strnlen_s(limit_host, 128); + char* found = NULL; + if (strstr_s(host, (r_size_t)(buffer_end - host), limit_host, limit_host_len, &found) != EOK) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_HOST; + } + + // Look for the key start and null-terminate it within the receive buffer + key = GetField(buffer, buffer_len, "Sec-WebSocket-Key:"); + if (key == NULL) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_KEY; + if (strstr_s(key, (r_size_t)(buffer_end - key), "\r\n", 2, &key_end) != EOK) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_KEY; + *key_end = 0; + + // Concatenate the browser's key with the WebSocket Protocol GUID and base64 encode + // the hash, to prove to the browser that this is a bonafide WebSocket server + buffer[0] = 0; + if (strncat_s(buffer, buffer_len, key, (r_size_t)(key_end - key)) != EOK) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL; + if (strncat_s(buffer, buffer_len, websocket_guid, sizeof(websocket_guid)) != EOK) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL; + hash = SHA1_Calculate(buffer, (rmtU32)strnlen_s(buffer, buffer_len)); + Base64_Encode(hash.data, sizeof(hash.data), (rmtU8*)buffer); + + // Send the response back to the server with a longer timeout than usual + response_buffer[0] = 0; + if (strncat_s(response_buffer, response_buffer_len, websocket_response, sizeof(websocket_response)) != EOK) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL; + if (strncat_s(response_buffer, response_buffer_len, buffer, buffer_len) != EOK) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL; + if (strncat_s(response_buffer, response_buffer_len, "\r\n\r\n", 4) != EOK) + return RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL; + + return TCPSocket_Send(tcp_socket, response_buffer, (rmtU32)strnlen_s(response_buffer, response_buffer_len), 1000); +} + +static rmtError WebSocket_Constructor(WebSocket* web_socket, TCPSocket* tcp_socket) +{ + assert(web_socket != NULL); + web_socket->tcp_socket = tcp_socket; + web_socket->mode = WEBSOCKET_NONE; + web_socket->frame_bytes_remaining = 0; + web_socket->mask_offset = 0; + web_socket->data.mask[0] = 0; + web_socket->data.mask[1] = 0; + web_socket->data.mask[2] = 0; + web_socket->data.mask[3] = 0; + + // Caller can optionally specify which TCP socket to use + if (web_socket->tcp_socket == NULL) + rmtTryNew(TCPSocket, web_socket->tcp_socket); + + return RMT_ERROR_NONE; +} + +static void WebSocket_Destructor(WebSocket* web_socket) +{ + WebSocket_Close(web_socket); +} + +static rmtError WebSocket_RunServer(WebSocket* web_socket, rmtU16 port, rmtBool reuse_open_port, + rmtBool limit_connections_to_localhost, enum WebSocketMode mode) +{ + // Create the server's listening socket + assert(web_socket != NULL); + web_socket->mode = mode; + return TCPSocket_RunServer(web_socket->tcp_socket, port, reuse_open_port, limit_connections_to_localhost); +} + +static void WebSocket_Close(WebSocket* web_socket) +{ + assert(web_socket != NULL); + rmtDelete(TCPSocket, web_socket->tcp_socket); +} + +static SocketStatus WebSocket_PollStatus(WebSocket* web_socket) +{ + assert(web_socket != NULL); + return TCPSocket_PollStatus(web_socket->tcp_socket); +} + +static rmtError WebSocket_AcceptConnection(WebSocket* web_socket, WebSocket** client_socket) +{ + TCPSocket* tcp_socket = NULL; + + // Is there a waiting connection? + assert(web_socket != NULL); + rmtTry(TCPSocket_AcceptConnection(web_socket->tcp_socket, &tcp_socket)); + if (tcp_socket == NULL) + return RMT_ERROR_NONE; + + // Need a successful handshake between client/server before allowing the connection + // TODO: Specify limit_host + rmtTry(WebSocketHandshake(tcp_socket, NULL)); + + // Allocate and return a new client socket + assert(client_socket != NULL); + rmtTryNew(WebSocket, *client_socket, tcp_socket); + + (*client_socket)->mode = web_socket->mode; + + return RMT_ERROR_NONE; +} + +static void WriteSize(rmtU32 size, rmtU8* dest, rmtU32 dest_size, rmtU32 dest_offset) +{ + int size_size = dest_size - dest_offset; + rmtU32 i; + for (i = 0; i < dest_size; i++) + { + int j = i - dest_offset; + dest[i] = (j < 0) ? 0 : (size >> ((size_size - j - 1) * 8)) & 0xFF; + } +} + +// For send buffers to preallocate +#define WEBSOCKET_MAX_FRAME_HEADER_SIZE 10 + +static void WebSocket_PrepareBuffer(Buffer* buffer) +{ + char empty_frame_header[WEBSOCKET_MAX_FRAME_HEADER_SIZE]; + + assert(buffer != NULL); + + // Reset to start + buffer->bytes_used = 0; + + // Allocate enough space for a maximum-sized frame header + Buffer_Write(buffer, empty_frame_header, sizeof(empty_frame_header)); +} + +static rmtU32 WebSocket_FrameHeaderSize(rmtU32 length) +{ + if (length <= 125) + return 2; + if (length <= 65535) + return 4; + return 10; +} + +static void WebSocket_WriteFrameHeader(WebSocket* web_socket, rmtU8* dest, rmtU32 length) +{ + rmtU8 final_fragment = 0x1 << 7; + rmtU8 frame_type = (rmtU8)web_socket->mode; + + dest[0] = final_fragment | frame_type; + + // Construct the frame header, correctly applying the narrowest size + if (length <= 125) + { + dest[1] = (rmtU8)length; + } + else if (length <= 65535) + { + dest[1] = 126; + WriteSize(length, dest + 2, 2, 0); + } + else + { + dest[1] = 127; + WriteSize(length, dest + 2, 8, 4); + } +} + +static rmtError WebSocket_Send(WebSocket* web_socket, const void* data, rmtU32 length, rmtU32 timeout_ms) +{ + rmtError error; + SocketStatus status; + rmtU32 payload_length, frame_header_size, delta; + + assert(web_socket != NULL); + assert(data != NULL); + + // Can't send if there are socket errors + status = WebSocket_PollStatus(web_socket); + if (status.error_state != RMT_ERROR_NONE) + return status.error_state; + + // Assume space for max frame header has been allocated in the incoming data + payload_length = length - WEBSOCKET_MAX_FRAME_HEADER_SIZE; + frame_header_size = WebSocket_FrameHeaderSize(payload_length); + delta = WEBSOCKET_MAX_FRAME_HEADER_SIZE - frame_header_size; + data = (void*)((rmtU8*)data + delta); + length -= delta; + WebSocket_WriteFrameHeader(web_socket, (rmtU8*)data, payload_length); + + // Send frame header and data together + error = TCPSocket_Send(web_socket->tcp_socket, data, length, timeout_ms); + return error; +} + +static rmtError ReceiveFrameHeader(WebSocket* web_socket) +{ + // TODO: Specify infinite timeout? + + rmtU8 msg_header[2] = {0, 0}; + int msg_length, size_bytes_remaining, i; + rmtBool mask_present; + + assert(web_socket != NULL); + + // Get message header + rmtTry(TCPSocket_Receive(web_socket->tcp_socket, msg_header, 2, 20)); + + // Check for WebSocket Protocol disconnect + if (msg_header[0] == 0x88) + return RMT_ERROR_WEBSOCKET_DISCONNECTED; + + // Check that the client isn't sending messages we don't understand + if (msg_header[0] != 0x81 && msg_header[0] != 0x82) + return RMT_ERROR_WEBSOCKET_BAD_FRAME_HEADER; + + // Get message length and check to see if it's a marker for a wider length + msg_length = msg_header[1] & 0x7F; + size_bytes_remaining = 0; + switch (msg_length) + { + case 126: + size_bytes_remaining = 2; + break; + case 127: + size_bytes_remaining = 8; + break; + } + + if (size_bytes_remaining > 0) + { + // Receive the wider bytes of the length + rmtU8 size_bytes[8]; + rmtTry(TCPSocket_Receive(web_socket->tcp_socket, size_bytes, size_bytes_remaining, 20)); + + // Calculate new length, MSB first + msg_length = 0; + for (i = 0; i < size_bytes_remaining; i++) + msg_length |= size_bytes[i] << ((size_bytes_remaining - 1 - i) * 8); + } + + // Receive any message data masks + mask_present = (msg_header[1] & 0x80) != 0 ? RMT_TRUE : RMT_FALSE; + if (mask_present) + { + rmtTry(TCPSocket_Receive(web_socket->tcp_socket, web_socket->data.mask, 4, 20)); + } + + web_socket->frame_bytes_remaining = msg_length; + web_socket->mask_offset = 0; + + return RMT_ERROR_NONE; +} + +static rmtError WebSocket_Receive(WebSocket* web_socket, void* data, rmtU32* msg_len, rmtU32 length, rmtU32 timeout_ms) +{ + SocketStatus status; + char* cur_data; + char* end_data; + rmtU32 start_ms; + rmtU32 now_ms; + rmtU32 bytes_to_read; + + assert(web_socket != NULL); + + // Can't read with any socket errors + status = WebSocket_PollStatus(web_socket); + if (status.error_state != RMT_ERROR_NONE) + { + return status.error_state; + } + + cur_data = (char*)data; + end_data = cur_data + length; + + start_ms = msTimer_Get(); + while (cur_data < end_data) + { + // Get next WebSocket frame if we've run out of data to read from the socket + if (web_socket->frame_bytes_remaining == 0) + { + rmtTry(ReceiveFrameHeader(web_socket)); + + // Set output message length only on initial receive + if (msg_len != NULL) + { + *msg_len = web_socket->frame_bytes_remaining; + } + } + + { + rmtError error; + + // Read as much required data as possible + bytes_to_read = web_socket->frame_bytes_remaining < length ? web_socket->frame_bytes_remaining : length; + error = TCPSocket_Receive(web_socket->tcp_socket, cur_data, bytes_to_read, 20); + if (error == RMT_ERROR_SOCKET_RECV_FAILED) + { + return error; + } + + // If there's a stall receiving the data, check for timeout + if (error == RMT_ERROR_SOCKET_RECV_NO_DATA || error == RMT_ERROR_SOCKET_RECV_TIMEOUT) + { + now_ms = msTimer_Get(); + if (now_ms - start_ms > timeout_ms) + { + return RMT_ERROR_SOCKET_RECV_TIMEOUT; + } + continue; + } + } + + // Apply data mask + if (web_socket->data.mask_u32 != 0) + { + rmtU32 i; + for (i = 0; i < bytes_to_read; i++) + { + *((rmtU8*)cur_data + i) ^= web_socket->data.mask[web_socket->mask_offset & 3]; + web_socket->mask_offset++; + } + } + + cur_data += bytes_to_read; + web_socket->frame_bytes_remaining -= bytes_to_read; + } + + return RMT_ERROR_NONE; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @MESSAGEQ: Multiple producer, single consumer message queue +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +typedef enum MessageID +{ + MsgID_NotReady, + MsgID_AddToStringTable, + MsgID_LogText, + MsgID_SampleTree, + MsgID_ProcessorThreads, + MsgID_None, + MsgID_PropertySnapshot, + MsgID_Force32Bits = 0xFFFFFFFF, +} MessageID; + +typedef struct Message +{ + MessageID id; + + rmtU32 payload_size; + + // For telling which thread the message came from in the debugger + struct ThreadProfiler* threadProfiler; + + rmtU8 payload[1]; +} Message; + +// Multiple producer, single consumer message queue that uses its own data buffer +// to store the message data. +typedef struct rmtMessageQueue +{ + rmtU32 size; + + // The physical address of this data buffer is pointed to by two sequential + // virtual memory pages, allowing automatic wrap-around of any reads or writes + // that exceed the limits of the buffer. + VirtualMirrorBuffer* data; + + // Read/write position never wrap allowing trivial overflow checks + // with easier debugging + rmtAtomicU32 read_pos; + rmtAtomicU32 write_pos; + +} rmtMessageQueue; + +static rmtError rmtMessageQueue_Constructor(rmtMessageQueue* queue, rmtU32 size) +{ + assert(queue != NULL); + + // Set defaults + queue->size = 0; + queue->data = NULL; + queue->read_pos = 0; + queue->write_pos = 0; + + rmtTryNew(VirtualMirrorBuffer, queue->data, size, 10); + + // The mirror buffer needs to be page-aligned and will change the requested + // size to match that. + queue->size = queue->data->size; + + // Set the entire buffer to not ready message + memset(queue->data->ptr, MsgID_NotReady, queue->size); + + return RMT_ERROR_NONE; +} + +static void rmtMessageQueue_Destructor(rmtMessageQueue* queue) +{ + assert(queue != NULL); + rmtDelete(VirtualMirrorBuffer, queue->data); +} + +static rmtU32 rmtMessageQueue_SizeForPayload(rmtU32 payload_size) +{ + // Add message header and align for ARM platforms + rmtU32 size = sizeof(Message) + payload_size; +#if defined(RMT_ARCH_64BIT) + size = (size + 7) & ~7U; +#else + size = (size + 3) & ~3U; +#endif + return size; +} + +static Message* rmtMessageQueue_AllocMessage(rmtMessageQueue* queue, rmtU32 payload_size, + struct ThreadProfiler* thread_profiler) +{ + Message* msg; + + rmtU32 write_size = rmtMessageQueue_SizeForPayload(payload_size); + + assert(queue != NULL); + + for (;;) + { + // Check for potential overflow + // Order of loads means allocation failure can happen when enough space has just been freed + // However, incorrect overflows are not possible + rmtU32 s = queue->size; + rmtU32 w = LoadAcquire(&queue->write_pos); + rmtU32 r = LoadAcquire(&queue->read_pos); + if ((int)(w - r) > ((int)(s - write_size))) + return NULL; + + // Point to the newly allocated space + msg = (Message*)(queue->data->ptr + (w & (s - 1))); + + // Increment the write position, leaving the loop if this is the thread that succeeded + if (AtomicCompareAndSwapU32(&queue->write_pos, w, w + write_size) == RMT_TRUE) + { + // Safe to set payload size after thread claims ownership of this allocated range + msg->payload_size = payload_size; + msg->threadProfiler = thread_profiler; + break; + } + } + + return msg; +} + +static void rmtMessageQueue_CommitMessage(Message* message, MessageID id) +{ + MessageID r; + assert(message != NULL); + + // Setting the message ID signals to the consumer that the message is ready + r = (MessageID)LoadAcquire((rmtAtomicU32*)&message->id); + RMT_UNREFERENCED_PARAMETER(r); + assert(r == MsgID_NotReady); + StoreRelease((rmtAtomicU32*)&message->id, id); +} + +Message* rmtMessageQueue_PeekNextMessage(rmtMessageQueue* queue) +{ + Message* ptr; + rmtU32 r, w; + MessageID id; + + assert(queue != NULL); + + // First check that there are bytes queued + w = LoadAcquire(&queue->write_pos); + r = queue->read_pos; + if (w - r == 0) + return NULL; + + // Messages are in the queue but may not have been commit yet + // Messages behind this one may have been commit but it's not reachable until + // the next one in the queue is ready. + r = r & (queue->size - 1); + ptr = (Message*)(queue->data->ptr + r); + id = (MessageID)LoadAcquire((rmtAtomicU32*)&ptr->id); + if (id != MsgID_NotReady) + return ptr; + + return NULL; +} + +static void rmtMessageQueue_ConsumeNextMessage(rmtMessageQueue* queue, Message* message) +{ + rmtU32 message_size, read_pos; + + assert(queue != NULL); + assert(message != NULL); + + // Setting the message ID to "not ready" serves as a marker to the consumer that even though + // space has been allocated for a message, the message isn't ready to be consumed + // yet. + // + // We can't do that when allocating the message because multiple threads will be fighting for + // the same location. Instead, clear out any messages just read by the consumer before advancing + // the read position so that a winning thread's allocation will inherit the "not ready" state. + // + // This costs some write bandwidth and has the potential to flush cache to other cores. + message_size = rmtMessageQueue_SizeForPayload(message->payload_size); + memset(message, MsgID_NotReady, message_size); + + // Advance read position + read_pos = queue->read_pos + message_size; + StoreRelease(&queue->read_pos, read_pos); +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @NETWORK: Network Server +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +typedef rmtError (*Server_ReceiveHandler)(void*, char*, rmtU32); + +typedef struct +{ + WebSocket* listen_socket; + + WebSocket* client_socket; + + rmtU32 last_ping_time; + + rmtU16 port; + + rmtBool reuse_open_port; + rmtBool limit_connections_to_localhost; + + // A dynamically-sized buffer used for binary-encoding messages and sending to the client + Buffer* bin_buf; + + // Handler for receiving messages from the client + Server_ReceiveHandler receive_handler; + void* receive_handler_context; +} Server; + +static rmtError Server_CreateListenSocket(Server* server, rmtU16 port, rmtBool reuse_open_port, + rmtBool limit_connections_to_localhost) +{ + rmtTryNew(WebSocket, server->listen_socket, NULL); + rmtTry(WebSocket_RunServer(server->listen_socket, port, reuse_open_port, limit_connections_to_localhost, WEBSOCKET_BINARY)); + return RMT_ERROR_NONE; +} + +static rmtError Server_Constructor(Server* server, rmtU16 port, rmtBool reuse_open_port, + rmtBool limit_connections_to_localhost) +{ + assert(server != NULL); + server->listen_socket = NULL; + server->client_socket = NULL; + server->last_ping_time = 0; + server->port = port; + server->reuse_open_port = reuse_open_port; + server->limit_connections_to_localhost = limit_connections_to_localhost; + server->bin_buf = NULL; + server->receive_handler = NULL; + server->receive_handler_context = NULL; + + // Create the binary serialisation buffer + rmtTryNew(Buffer, server->bin_buf, 4096); + + // Create the listening WebSocket + return Server_CreateListenSocket(server, port, reuse_open_port, limit_connections_to_localhost); +} + +static void Server_Destructor(Server* server) +{ + assert(server != NULL); + rmtDelete(WebSocket, server->client_socket); + rmtDelete(WebSocket, server->listen_socket); + rmtDelete(Buffer, server->bin_buf); +} + +static rmtBool Server_IsClientConnected(Server* server) +{ + assert(server != NULL); + return server->client_socket != NULL ? RMT_TRUE : RMT_FALSE; +} + +static void Server_DisconnectClient(Server* server) +{ + WebSocket* client_socket; + + assert(server != NULL); + + // NULL the variable before destroying the socket + client_socket = server->client_socket; + server->client_socket = NULL; + CompilerWriteFence(); + rmtDelete(WebSocket, client_socket); +} + +static rmtError Server_Send(Server* server, const void* data, rmtU32 length, rmtU32 timeout) +{ + assert(server != NULL); + if (Server_IsClientConnected(server)) + { + rmtError error = WebSocket_Send(server->client_socket, data, length, timeout); + if (error == RMT_ERROR_SOCKET_SEND_FAIL) + Server_DisconnectClient(server); + + return error; + } + + return RMT_ERROR_NONE; +} + +static rmtError Server_ReceiveMessage(Server* server, char message_first_byte, rmtU32 message_length) +{ + char message_data[1024]; + + // Check for potential message data overflow + if (message_length >= sizeof(message_data) - 1) + { + rmt_LogText("Ignoring console input bigger than internal receive buffer (1024 bytes)"); + return RMT_ERROR_NONE; + } + + // Receive the rest of the message + message_data[0] = message_first_byte; + rmtTry(WebSocket_Receive(server->client_socket, message_data + 1, NULL, message_length - 1, 100)); + message_data[message_length] = 0; + + // Each message must have a descriptive 4 byte header + if (message_length < 4) + return RMT_ERROR_NONE; + + // Dispatch to handler + if (server->receive_handler) + rmtTry(server->receive_handler(server->receive_handler_context, message_data, message_length)); + + return RMT_ERROR_NONE; +} + +static rmtError bin_MessageHeader(Buffer* buffer, const char* id, rmtU32* out_write_start_offset) +{ + // Record where the header starts before writing it + *out_write_start_offset = buffer->bytes_used; + rmtTry(Buffer_Write(buffer, (void*)id, 4)); + rmtTry(Buffer_Write(buffer, (void*)" ", 4)); + return RMT_ERROR_NONE; +} + +static rmtError bin_MessageFooter(Buffer* buffer, rmtU32 write_start_offset) +{ + // Align message size to 32-bits so that the viewer can alias float arrays within log files + rmtTry(Buffer_AlignedPad(buffer, write_start_offset)); + + // Patch message size, including padding at the end + U32ToByteArray(buffer->data + write_start_offset + 4, (buffer->bytes_used - write_start_offset)); + + return RMT_ERROR_NONE; +} + +static void Server_Update(Server* server) +{ + rmtU32 cur_time; + + assert(server != NULL); + + // Recreate the listening socket if it's been destroyed earlier + if (server->listen_socket == NULL) + Server_CreateListenSocket(server, server->port, server->reuse_open_port, + server->limit_connections_to_localhost); + + if (server->listen_socket != NULL && server->client_socket == NULL) + { + // Accept connections as long as there is no client connected + WebSocket* client_socket = NULL; + rmtError error = WebSocket_AcceptConnection(server->listen_socket, &client_socket); + if (error == RMT_ERROR_NONE) + { + server->client_socket = client_socket; + } + else + { + // Destroy the listen socket on failure to accept + // It will get recreated in another update + rmtDelete(WebSocket, server->listen_socket); + } + } + + else + { + // Loop checking for incoming messages + for (;;) + { + // Inspect first byte to see if a message is there + char message_first_byte; + rmtU32 message_length; + rmtError error = WebSocket_Receive(server->client_socket, &message_first_byte, &message_length, 1, 0); + if (error == RMT_ERROR_NONE) + { + // Parse remaining message + error = Server_ReceiveMessage(server, message_first_byte, message_length); + if (error != RMT_ERROR_NONE) + { + Server_DisconnectClient(server); + break; + } + + // Check for more... + continue; + } + + // Passable errors... + if (error == RMT_ERROR_SOCKET_RECV_NO_DATA) + { + // No data available + break; + } + + if (error == RMT_ERROR_SOCKET_RECV_TIMEOUT) + { + // Data not available yet, can afford to ignore as we're only reading the first byte + break; + } + + // Anything else is an error that may have closed the connection + Server_DisconnectClient(server); + break; + } + } + + // Send pings to the client every second + cur_time = msTimer_Get(); + if (cur_time - server->last_ping_time > 1000) + { + Buffer* bin_buf = server->bin_buf; + rmtU32 write_start_offset; + WebSocket_PrepareBuffer(bin_buf); + bin_MessageHeader(bin_buf, "PING", &write_start_offset); + bin_MessageFooter(bin_buf, write_start_offset); + Server_Send(server, bin_buf->data, bin_buf->bytes_used, 10); + server->last_ping_time = cur_time; + } +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @SAMPLE: Base Sample Description for CPU by default +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#define SAMPLE_NAME_LEN 128 + +typedef struct Sample +{ + // Inherit so that samples can be quickly allocated + ObjectLink Link; + + enum rmtSampleType type; + + // Hash generated from sample name + rmtU32 name_hash; + + // Unique, persistent ID among all samples + rmtU32 unique_id; + + // RGB8 unique colour generated from the unique ID + rmtU8 uniqueColour[3]; + + // Links to related samples in the tree + struct Sample* parent; + struct Sample* first_child; + struct Sample* last_child; + struct Sample* next_sibling; + + // Keep track of child count to distinguish from repeated calls to the same function at the same stack level + // This is also mixed with the callstack hash to allow consistent addressing of any point in the tree + rmtU32 nb_children; + + // Sample end points and length in microseconds + rmtU64 us_start; + rmtU64 us_end; + rmtU64 us_length; + + // Total sampled length of all children + rmtU64 us_sampled_length; + + // If this is a GPU sample, when the sample was issued on the GPU + rmtU64 usGpuIssueOnCpu; + + // Number of times this sample was used in a call in aggregate mode, 1 otherwise + rmtU32 call_count; + + // Current and maximum sample recursion depths + rmtU16 recurse_depth; + rmtU16 max_recurse_depth; + +} Sample; + +static rmtError Sample_Constructor(Sample* sample) +{ + assert(sample != NULL); + + ObjectLink_Constructor((ObjectLink*)sample); + + sample->type = RMT_SampleType_CPU; + sample->name_hash = 0; + sample->unique_id = 0; + sample->uniqueColour[0] = 0; + sample->uniqueColour[1] = 0; + sample->uniqueColour[2] = 0; + sample->parent = NULL; + sample->first_child = NULL; + sample->last_child = NULL; + sample->next_sibling = NULL; + sample->nb_children = 0; + sample->us_start = 0; + sample->us_end = 0; + sample->us_length = 0; + sample->us_sampled_length = 0; + sample->usGpuIssueOnCpu = 0; + sample->call_count = 0; + sample->recurse_depth = 0; + sample->max_recurse_depth = 0; + + return RMT_ERROR_NONE; +} + +static void Sample_Destructor(Sample* sample) +{ + RMT_UNREFERENCED_PARAMETER(sample); +} + +static void Sample_Prepare(Sample* sample, rmtU32 name_hash, Sample* parent) +{ + sample->name_hash = name_hash; + sample->unique_id = 0; + sample->parent = parent; + sample->first_child = NULL; + sample->last_child = NULL; + sample->next_sibling = NULL; + sample->nb_children = 0; + sample->us_start = 0; + sample->us_end = 0; + sample->us_length = 0; + sample->us_sampled_length = 0; + sample->usGpuIssueOnCpu = 0; + sample->call_count = 1; + sample->recurse_depth = 0; + sample->max_recurse_depth = 0; +} + +static void Sample_Close(Sample* sample, rmtS64 us_end) +{ + // Aggregate samples use us_end to store start so that us_start is preserved + rmtS64 us_length = 0; + if (sample->call_count > 1 && sample->max_recurse_depth == 0) + { + us_length = maxS64(us_end - sample->us_end, 0); + } + else + { + us_length = maxS64(us_end - sample->us_start, 0); + } + + sample->us_length += us_length; + + // Sum length on the parent to track un-sampled time in the parent + if (sample->parent != NULL) + { + sample->parent->us_sampled_length += us_length; + } +} + +static void Sample_CopyState(Sample* dst_sample, const Sample* src_sample) +{ + // Copy fields that don't override destination allocator links or transfer source sample tree positioning + // Also ignoring uniqueColour as that's calculated in the Remotery thread + dst_sample->type = src_sample->type; + dst_sample->name_hash = src_sample->name_hash; + dst_sample->unique_id = src_sample->unique_id; + dst_sample->nb_children = src_sample->nb_children; + dst_sample->us_start = src_sample->us_start; + dst_sample->us_end = src_sample->us_end; + dst_sample->us_length = src_sample->us_length; + dst_sample->us_sampled_length = src_sample->us_sampled_length; + dst_sample->usGpuIssueOnCpu = src_sample->usGpuIssueOnCpu; + dst_sample->call_count = src_sample->call_count; + dst_sample->recurse_depth = src_sample->recurse_depth; + dst_sample->max_recurse_depth = src_sample->max_recurse_depth; + + // Prepare empty tree links + dst_sample->parent = NULL; + dst_sample->first_child = NULL; + dst_sample->last_child = NULL; + dst_sample->next_sibling = NULL; +} + +static rmtError bin_SampleArray(Buffer* buffer, Sample* parent_sample, rmtU8 depth); + +static rmtError bin_Sample(Buffer* buffer, Sample* sample, rmtU8 depth) +{ + assert(sample != NULL); + + rmtTry(Buffer_WriteU32(buffer, sample->name_hash)); + rmtTry(Buffer_WriteU32(buffer, sample->unique_id)); + rmtTry(Buffer_Write(buffer, sample->uniqueColour, 3)); + rmtTry(Buffer_Write(buffer, &depth, 1)); + rmtTry(Buffer_WriteU64(buffer, sample->us_start)); + rmtTry(Buffer_WriteU64(buffer, sample->us_length)); + rmtTry(Buffer_WriteU64(buffer, maxS64(sample->us_length - sample->us_sampled_length, 0))); + rmtTry(Buffer_WriteU64(buffer, sample->usGpuIssueOnCpu)); + rmtTry(Buffer_WriteU32(buffer, sample->call_count)); + rmtTry(Buffer_WriteU32(buffer, sample->max_recurse_depth)); + rmtTry(bin_SampleArray(buffer, sample, depth + 1)); + + return RMT_ERROR_NONE; +} + +static rmtError bin_SampleArray(Buffer* buffer, Sample* parent_sample, rmtU8 depth) +{ + Sample* sample; + + rmtTry(Buffer_WriteU32(buffer, parent_sample->nb_children)); + for (sample = parent_sample->first_child; sample != NULL; sample = sample->next_sibling) + rmtTry(bin_Sample(buffer, sample, depth)); + + return RMT_ERROR_NONE; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @SAMPLETREE: A tree of samples with their allocator +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +typedef struct SampleTree +{ + // Allocator for all samples + ObjectAllocator* allocator; + + // Root sample for all samples created by this thread + Sample* root; + + // Most recently pushed sample + Sample* currentParent; + + // Last time this sample tree was completed and sent to listeners, for stall detection + rmtAtomicU32 msLastTreeSendTime; + + // Lightweight flag, changed with release/acquire semantics to inform the stall detector the state of the tree is unreliable + rmtAtomicU32 treeBeingModified; + + // Send this popped sample to the log/viewer on close? + Sample* sendSampleOnClose; + +} SampleTree; + +// Notify tree watchers that its structure is in the process of being changed +#define ModifySampleTree(tree, statements) \ + StoreRelease(&tree->treeBeingModified, 1); \ + statements; \ + StoreRelease(&tree->treeBeingModified, 0); + +static rmtError SampleTree_Constructor(SampleTree* tree, rmtU32 sample_size, ObjConstructor constructor, + ObjDestructor destructor) +{ + assert(tree != NULL); + + tree->allocator = NULL; + tree->root = NULL; + tree->currentParent = NULL; + StoreRelease(&tree->msLastTreeSendTime, 0); + StoreRelease(&tree->treeBeingModified, 0); + tree->sendSampleOnClose = NULL; + + // Create the sample allocator + rmtTryNew(ObjectAllocator, tree->allocator, sample_size, constructor, destructor); + + // Create a root sample that's around for the lifetime of the thread + rmtTry(ObjectAllocator_Alloc(tree->allocator, (void**)&tree->root)); + Sample_Prepare(tree->root, 0, NULL); + tree->currentParent = tree->root; + + return RMT_ERROR_NONE; +} + +static void SampleTree_Destructor(SampleTree* tree) +{ + assert(tree != NULL); + + if (tree->root != NULL) + { + ObjectAllocator_Free(tree->allocator, tree->root); + tree->root = NULL; + } + + rmtDelete(ObjectAllocator, tree->allocator); +} + +static rmtU32 HashCombine(rmtU32 hash_a, rmtU32 hash_b) +{ + // A sequence of 32 uniformly random bits so that each bit of the combined hash is changed on application + // Derived from the golden ratio: UINT_MAX / ((1 + sqrt(5)) / 2) + // In reality it's just an arbitrary value which happens to work well, avoiding mapping all zeros to zeros. + // http://burtleburtle.net/bob/hash/doobs.html + static rmtU32 random_bits = 0x9E3779B9; + hash_a ^= hash_b + random_bits + (hash_a << 6) + (hash_a >> 2); + return hash_a; +} + +static rmtError SampleTree_Push(SampleTree* tree, rmtU32 name_hash, rmtU32 flags, Sample** sample) +{ + Sample* parent; + rmtU32 unique_id; + + // As each tree has a root sample node allocated, a parent must always be present + assert(tree != NULL); + assert(tree->currentParent != NULL); + parent = tree->currentParent; + + // Assume no flags is the common case and predicate branch checks + if (flags != 0) + { + // Check root status + if ((flags & RMTSF_Root) != 0) + { + assert(parent->parent == NULL); + } + + if ((flags & RMTSF_Aggregate) != 0) + { + // Linear search for previous instance of this sample name + Sample* sibling; + for (sibling = parent->first_child; sibling != NULL; sibling = sibling->next_sibling) + { + if (sibling->name_hash == name_hash) + { + tree->currentParent = sibling; + sibling->call_count++; + *sample = sibling; + return RMT_ERROR_NONE; + } + } + } + + // Collapse sample on recursion + if ((flags & RMTSF_Recursive) != 0 && parent->name_hash == name_hash) + { + parent->recurse_depth++; + parent->max_recurse_depth = maxU16(parent->max_recurse_depth, parent->recurse_depth); + parent->call_count++; + *sample = parent; + return RMT_ERROR_RECURSIVE_SAMPLE; + } + + // Allocate a new sample for subsequent flag checks to reference + rmtTry(ObjectAllocator_Alloc(tree->allocator, (void**)sample)); + Sample_Prepare(*sample, name_hash, parent); + + // Check for sending this sample on close + if ((flags & RMTSF_SendOnClose) != 0) + { + assert(tree->currentParent != NULL); + assert(tree->sendSampleOnClose == NULL); + tree->sendSampleOnClose = *sample; + } + } + + else + { + // Allocate a new sample + rmtTry(ObjectAllocator_Alloc(tree->allocator, (void**)sample)); + Sample_Prepare(*sample, name_hash, parent); + } + + // Generate a unique ID for this sample in the tree + unique_id = parent->unique_id; + unique_id = HashCombine(unique_id, (*sample)->name_hash); + unique_id = HashCombine(unique_id, parent->nb_children); + (*sample)->unique_id = unique_id; + + // Add sample to its parent + parent->nb_children++; + if (parent->first_child == NULL) + { + parent->first_child = *sample; + parent->last_child = *sample; + } + else + { + assert(parent->last_child != NULL); + parent->last_child->next_sibling = *sample; + parent->last_child = *sample; + } + + // Make this sample the new parent of any newly created samples + tree->currentParent = *sample; + + return RMT_ERROR_NONE; +} + +static void SampleTree_Pop(SampleTree* tree, Sample* sample) +{ + assert(tree != NULL); + assert(sample != NULL); + assert(sample != tree->root); + tree->currentParent = sample->parent; +} + +static ObjectLink* FlattenSamples(Sample* sample, rmtU32* nb_samples) +{ + Sample* child; + ObjectLink* cur_link = &sample->Link; + + assert(sample != NULL); + assert(nb_samples != NULL); + + *nb_samples += 1; + sample->Link.next = (ObjectLink*)sample->first_child; + + // Link all children together + for (child = sample->first_child; child != NULL; child = child->next_sibling) + { + ObjectLink* last_link = FlattenSamples(child, nb_samples); + last_link->next = (ObjectLink*)child->next_sibling; + cur_link = last_link; + } + + // Clear child info + sample->first_child = NULL; + sample->last_child = NULL; + sample->nb_children = 0; + + return cur_link; +} + +static void FreeSamples(Sample* sample, ObjectAllocator* allocator) +{ + // Chain all samples together in a flat list + rmtU32 nb_cleared_samples = 0; + ObjectLink* last_link = FlattenSamples(sample, &nb_cleared_samples); + + // Release the complete sample memory range + if (sample->Link.next != NULL) + { + ObjectAllocator_FreeRange(allocator, sample, last_link, nb_cleared_samples); + } + else + { + ObjectAllocator_Free(allocator, sample); + } +} + +static rmtError SampleTree_CopySample(Sample** out_dst_sample, Sample* dst_parent_sample, ObjectAllocator* allocator, const Sample* src_sample) +{ + Sample* src_child; + + // Allocate a copy of the sample + Sample* dst_sample; + rmtTry(ObjectAllocator_Alloc(allocator, (void**)&dst_sample)); + Sample_CopyState(dst_sample, src_sample); + + // Link the newly created/copied sample to its parent + // Note that metrics including nb_children have already been copied by the Sample_CopyState call + if (dst_parent_sample != NULL) + { + if (dst_parent_sample->first_child == NULL) + { + dst_parent_sample->first_child = dst_sample; + dst_parent_sample->last_child = dst_sample; + } + else + { + assert(dst_parent_sample->last_child != NULL); + dst_parent_sample->last_child->next_sibling = dst_sample; + dst_parent_sample->last_child = dst_sample; + } + } + + // Copy all children + for (src_child = src_sample->first_child; src_child != NULL; src_child = src_child->next_sibling) + { + Sample* dst_child; + rmtTry(SampleTree_CopySample(&dst_child, dst_sample, allocator, src_child)); + } + + *out_dst_sample = dst_sample; + + return RMT_ERROR_NONE; +} + +static rmtError SampleTree_Copy(SampleTree* dst_tree, const SampleTree* src_tree) +{ + // Sample trees are allocated at startup and their allocators are persistent for the lifetime of the Remotery object. + // It's safe to reference the allocator and use it for sample lifetime. + ObjectAllocator* allocator = src_tree->allocator; + dst_tree->allocator = allocator; + + // Copy from the root + rmtTry(SampleTree_CopySample(&dst_tree->root, NULL, allocator, src_tree->root)); + dst_tree->currentParent = dst_tree->root; + + return RMT_ERROR_NONE; +} + +typedef struct Msg_SampleTree +{ + Sample* rootSample; + + ObjectAllocator* allocator; + + rmtPStr threadName; + + // Data specific to the sample tree that downstream users can inspect/use + rmtU32 userData; + + rmtBool partialTree; +} Msg_SampleTree; + +static void QueueSampleTree(rmtMessageQueue* queue, Sample* sample, ObjectAllocator* allocator, rmtPStr thread_name, rmtU32 user_data, + struct ThreadProfiler* thread_profiler, rmtBool partial_tree) +{ + Msg_SampleTree* payload; + + // Attempt to allocate a message for sending the tree to the viewer + Message* message = rmtMessageQueue_AllocMessage(queue, sizeof(Msg_SampleTree), thread_profiler); + if (message == NULL) + { + // Discard tree samples on failure + FreeSamples(sample, allocator); + return; + } + + // Populate and commit + payload = (Msg_SampleTree*)message->payload; + payload->rootSample = sample; + payload->allocator = allocator; + payload->threadName = thread_name; + payload->userData = user_data; + payload->partialTree = partial_tree; + rmtMessageQueue_CommitMessage(message, MsgID_SampleTree); +} + +typedef struct Msg_AddToStringTable +{ + rmtU32 hash; + rmtU32 length; +} Msg_AddToStringTable; + +static rmtBool QueueAddToStringTable(rmtMessageQueue* queue, rmtU32 hash, const char* string, size_t length, struct ThreadProfiler* thread_profiler) +{ + Msg_AddToStringTable* payload; + + // Attempt to allocate a message om the queue + size_t nb_string_bytes = length + 1; + Message* message = rmtMessageQueue_AllocMessage(queue, sizeof(Msg_AddToStringTable) + nb_string_bytes, thread_profiler); + if (message == NULL) + { + return RMT_FALSE; + } + + // Populate and commit + payload = (Msg_AddToStringTable*)message->payload; + payload->hash = hash; + payload->length = length; + memcpy(payload + 1, string, nb_string_bytes); + rmtMessageQueue_CommitMessage(message, MsgID_AddToStringTable); + + return RMT_TRUE; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @TPROFILER: Thread Profiler data, storing both sampling and instrumentation results +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#if RMT_USE_D3D11 +typedef struct D3D11 D3D11; +static rmtError D3D11_Create(D3D11** d3d11); +static void D3D11_Destructor(D3D11* d3d11); +#endif + +#if RMT_USE_D3D12 +typedef struct D3D12ThreadData D3D12ThreadData; +static rmtError D3D12ThreadData_Create(D3D12ThreadData** d3d12); +static void D3D12ThreadData_Destructor(D3D12ThreadData* d3d12); +#endif + +#if RMT_USE_VULKAN +typedef struct VulkanThreadData VulkanThreadData; +static rmtError VulkanThreadData_Create(VulkanThreadData** vulkan); +static void VulkanThreadData_Destructor(VulkanThreadData* vulkan); +#endif + +typedef struct ThreadProfiler +{ + // Storage for backing up initial register values when modifying a thread's context + rmtU64 registerBackup0; // 0 + rmtU64 registerBackup1; // 8 + rmtU64 registerBackup2; // 16 + + // Used to schedule callbacks taking into account some threads may be sleeping + rmtAtomicS32 nbSamplesWithoutCallback; // 24 + + // Index of the processor the thread was last seen running on + rmtU32 processorIndex; // 28 + rmtU32 lastProcessorIndex; + + // OS thread ID/handle + rmtThreadId threadId; + rmtThreadHandle threadHandle; + + // Thread name stored for sending to the viewer + char threadName[64]; + rmtU32 threadNameHash; + + // Store a unique sample tree for each type + SampleTree* sampleTrees[RMT_SampleType_Count]; + +#if RMT_USE_D3D11 + D3D11* d3d11; +#endif + +#if RMT_USE_D3D12 + D3D12ThreadData* d3d12ThreadData; +#endif + +#if RMT_USE_VULKAN + VulkanThreadData* vulkanThreadData; +#endif +} ThreadProfiler; + +static rmtError ThreadProfiler_Constructor(rmtMessageQueue* mq_to_rmt, ThreadProfiler* thread_profiler, rmtThreadId thread_id) +{ + rmtU32 name_length; + + // Set defaults + thread_profiler->nbSamplesWithoutCallback = 0; + thread_profiler->processorIndex = (rmtU32)-1; + thread_profiler->lastProcessorIndex = (rmtU32)-1; + thread_profiler->threadId = thread_id; + memset(thread_profiler->sampleTrees, 0, sizeof(thread_profiler->sampleTrees)); + +#if RMT_USE_D3D11 + thread_profiler->d3d11 = NULL; +#endif + +#if RMT_USE_D3D12 + thread_profiler->d3d12ThreadData = NULL; +#endif + +#if RMT_USE_VULKAN + thread_profiler->vulkanThreadData = NULL; +#endif + + // Pre-open the thread handle + rmtTry(rmtOpenThreadHandle(thread_id, &thread_profiler->threadHandle)); + + // Name the thread and add to the string table + // Users can override this at a later point with the Remotery thread name API + rmtGetThreadName(thread_id, thread_profiler->threadHandle, thread_profiler->threadName, sizeof(thread_profiler->threadName)); + name_length = strnlen_s(thread_profiler->threadName, 64); + thread_profiler->threadNameHash = _rmt_HashString32(thread_profiler->threadName, name_length, 0); + QueueAddToStringTable(mq_to_rmt, thread_profiler->threadNameHash, thread_profiler->threadName, name_length, thread_profiler); + + // Create the CPU sample tree only. The rest are created on-demand as they need extra context to function correctly. + rmtTryNew(SampleTree, thread_profiler->sampleTrees[RMT_SampleType_CPU], sizeof(Sample), (ObjConstructor)Sample_Constructor, + (ObjDestructor)Sample_Destructor); + +#if RMT_USE_D3D11 + rmtTry(D3D11_Create(&thread_profiler->d3d11)); +#endif + +#if RMT_USE_D3D12 + rmtTry(D3D12ThreadData_Create(&thread_profiler->d3d12ThreadData)); +#endif + +#if RMT_USE_VULKAN + rmtTry(VulkanThreadData_Create(&thread_profiler->vulkanThreadData)); +#endif + + return RMT_ERROR_NONE; +} + +static void ThreadProfiler_Destructor(ThreadProfiler* thread_profiler) +{ + rmtU32 index; + +#if RMT_USE_VULKAN + rmtDelete(VulkanThreadData, thread_profiler->vulkanThreadData); +#endif + +#if RMT_USE_D3D12 + rmtDelete(D3D12ThreadData, thread_profiler->d3d12ThreadData); +#endif + +#if RMT_USE_D3D11 + rmtDelete(D3D11, thread_profiler->d3d11); +#endif + + for (index = 0; index < RMT_SampleType_Count; index++) + { + rmtDelete(SampleTree, thread_profiler->sampleTrees[index]); + } + + rmtCloseThreadHandle(thread_profiler->threadHandle); +} + +static rmtError ThreadProfiler_Push(SampleTree* tree, rmtU32 name_hash, rmtU32 flags, Sample** sample) +{ + rmtError error; + ModifySampleTree(tree, + error = SampleTree_Push(tree, name_hash, flags, sample); + ); + return error; +} + +static void CloseOpenSamples(Sample* sample, rmtU64 sample_time_us, rmtU32 parents_are_last) +{ + Sample* child_sample; + + // Depth-first search into children as we want to close child samples before their parents + for (child_sample = sample->first_child; child_sample != NULL; child_sample = child_sample->next_sibling) + { + rmtU32 is_last = parents_are_last & (child_sample == sample->last_child ? 1 : 0); + CloseOpenSamples(child_sample, sample_time_us, is_last); + } + + // A chain of open samples will be linked from the root to the deepest, currently open sample + if (parents_are_last > 0) + { + Sample_Close(sample, sample_time_us); + } +} + +static rmtError MakePartialTreeCopy(SampleTree* sample_tree, rmtU64 sample_time_us, SampleTree* out_sample_tree_copy) +{ + rmtU32 sample_time_s = (rmtU32)(sample_time_us / 1000); + StoreRelease(&sample_tree->msLastTreeSendTime, sample_time_s); + + // Make a local copy of the tree as we want to keep the current tree for active profiling + rmtTry(SampleTree_Copy(out_sample_tree_copy, sample_tree)); + + // Close all samples from the deepest open sample, right back to the root + CloseOpenSamples(out_sample_tree_copy->root, sample_time_us, 1); + + return RMT_ERROR_NONE; +} + +static rmtBool ThreadProfiler_Pop(ThreadProfiler* thread_profiler, rmtMessageQueue* queue, Sample* sample, rmtU32 msg_user_data) +{ + SampleTree* tree = thread_profiler->sampleTrees[sample->type]; + SampleTree_Pop(tree, sample); + + // Are we back at the root? + if (tree->currentParent == tree->root) + { + Sample* root; + + // Disconnect all samples from the root and pack in the chosen message queue + ModifySampleTree(tree, + root = tree->root; + root->first_child = NULL; + root->last_child = NULL; + root->nb_children = 0; + ); + QueueSampleTree(queue, sample, tree->allocator, thread_profiler->threadName, msg_user_data, thread_profiler, RMT_FALSE); + + // Update the last send time for this tree, for stall detection + StoreRelease(&tree->msLastTreeSendTime, (rmtU32)(sample->us_end / 1000)); + + return RMT_TRUE; + } + + if (tree->sendSampleOnClose == sample) + { + // Copy the sample tree as it is and send as a partial tree + SampleTree partial_tree; + if (MakePartialTreeCopy(tree, sample->us_start + sample->us_length, &partial_tree) == RMT_ERROR_NONE) + { + Sample* root_sample = partial_tree.root->first_child; + assert(root_sample != NULL); + QueueSampleTree(queue, root_sample, partial_tree.allocator, thread_profiler->threadName, msg_user_data, thread_profiler, RMT_TRUE); + } + + // Tree has been copied away to the message queue so free up the samples + if (partial_tree.root != NULL) + { + FreeSamples(partial_tree.root, partial_tree.allocator); + } + + tree->sendSampleOnClose = NULL; + } + + return RMT_FALSE; +} + +static rmtU32 ThreadProfiler_GetNameHash(ThreadProfiler* thread_profiler, rmtMessageQueue* queue, rmtPStr name, rmtU32* hash_cache) +{ + size_t name_len; + rmtU32 name_hash; + + // Hash cache provided? + if (hash_cache != NULL) + { + // Calculate the hash first time round only + name_hash = AtomicLoadU32((rmtAtomicU32*)hash_cache); + if (name_hash == 0) + { + assert(name != NULL); + name_len = strnlen_s(name, 256); + name_hash = _rmt_HashString32(name, name_len, 0); + + // Queue the string for the string table and only cache the hash if it succeeds + if (QueueAddToStringTable(queue, name_hash, name, name_len, thread_profiler) == RMT_TRUE) + { + AtomicStoreU32((rmtAtomicU32*)hash_cache, name_hash); + } + } + + return name_hash; + } + + // Have to recalculate and speculatively insert the name every time when no cache storage exists + name_len = strnlen_s(name, 256); + name_hash = _rmt_HashString32(name, name_len, 0); + QueueAddToStringTable(queue, name_hash, name, name_len, thread_profiler); + return name_hash; +} + +typedef struct ThreadProfilers +{ + // Timer shared with Remotery threads + usTimer* timer; + + // Queue between clients and main remotery thread + rmtMessageQueue* mqToRmtThread; + + // On x64 machines this points to the sample function + void* compiledSampleFn; + rmtU32 compiledSampleFnSize; + + // Used to store thread profilers bound to an OS thread + rmtTLS threadProfilerTlsHandle; + + // Array of preallocated ThreadProfiler objects + // Read iteration is safe given that no incomplete ThreadProfiler objects will be encountered during iteration. + // The ThreadProfiler count is only incremented once a new ThreadProfiler is fully defined and ready to be used. + // Do not use this list to verify if a ThreadProfiler exists for a given thread. Use the mutex-guarded Get functions instead. + ThreadProfiler threadProfilers[256]; + rmtAtomicU32 nbThreadProfilers; + rmtU32 maxNbThreadProfilers; + + // Guards creation and existence-testing of the ThreadProfiler list + rmtMutex threadProfilerMutex; + + // Periodic thread sampling thread + rmtThread* threadSampleThread; + + // Periodic thread to processor gatherer + rmtThread* threadGatherThread; +} ThreadProfilers; + +static rmtError SampleThreadsLoop(rmtThread* rmt_thread); + +#ifdef RMT_PLATFORM_WINDOWS +#ifdef RMT_ARCH_64BIT +static void* CreateSampleCallback(rmtU32* out_size); +#endif +#endif + +static rmtError ThreadProfilers_Constructor(ThreadProfilers* thread_profilers, usTimer* timer, rmtMessageQueue* mq_to_rmt_thread) +{ + // Set to default + thread_profilers->timer = timer; + thread_profilers->mqToRmtThread = mq_to_rmt_thread; + thread_profilers->compiledSampleFn = NULL; + thread_profilers->compiledSampleFnSize = 0; + thread_profilers->threadProfilerTlsHandle = TLS_INVALID_HANDLE; + thread_profilers->nbThreadProfilers = 0; + thread_profilers->maxNbThreadProfilers = sizeof(thread_profilers->threadProfilers) / sizeof(thread_profilers->threadProfilers[0]); + mtxInit(&thread_profilers->threadProfilerMutex); + thread_profilers->threadSampleThread = NULL; + thread_profilers->threadGatherThread = NULL; + +#ifdef RMT_PLATFORM_WINDOWS +#ifdef RMT_ARCH_64BIT + thread_profilers->compiledSampleFn = CreateSampleCallback(&thread_profilers->compiledSampleFnSize); + if (thread_profilers->compiledSampleFn == NULL) + { + return RMT_ERROR_MALLOC_FAIL; + } +#endif +#endif + + // Allocate a TLS handle for the thread profilers + rmtTry(tlsAlloc(&thread_profilers->threadProfilerTlsHandle)); + + // Kick-off the thread sampler + if (g_Settings.enableThreadSampler == RMT_TRUE) + { + rmtTryNew(rmtThread, thread_profilers->threadSampleThread, SampleThreadsLoop, thread_profilers); + } + + return RMT_ERROR_NONE; +} + +static void ThreadProfilers_Destructor(ThreadProfilers* thread_profilers) +{ + rmtU32 thread_index; + + rmtDelete(rmtThread, thread_profilers->threadSampleThread); + + // Delete all profilers + for (thread_index = 0; thread_index < thread_profilers->nbThreadProfilers; thread_index++) + { + ThreadProfiler* thread_profiler = thread_profilers->threadProfilers + thread_index; + ThreadProfiler_Destructor(thread_profiler); + } + + if (thread_profilers->threadProfilerTlsHandle != TLS_INVALID_HANDLE) + { + tlsFree(thread_profilers->threadProfilerTlsHandle); + } + +#ifdef RMT_PLATFORM_WINDOWS +#ifdef RMT_ARCH_64BIT + if (thread_profilers->compiledSampleFn != NULL) + { + VirtualFree(thread_profilers->compiledSampleFn, 0, MEM_RELEASE); + } +#endif +#endif + + mtxDelete(&thread_profilers->threadProfilerMutex); +} + +static rmtError ThreadProfilers_GetThreadProfiler(ThreadProfilers* thread_profilers, rmtThreadId thread_id, ThreadProfiler** out_thread_profiler) +{ + rmtU32 profiler_index; + ThreadProfiler* thread_profiler; + rmtError error; + + mtxLock(&thread_profilers->threadProfilerMutex); + + // Linear search for a matching thread id + for (profiler_index = 0; profiler_index < thread_profilers->nbThreadProfilers; profiler_index++) + { + thread_profiler = thread_profilers->threadProfilers + profiler_index; + if (thread_profiler->threadId == thread_id) + { + *out_thread_profiler = thread_profiler; + mtxUnlock(&thread_profilers->threadProfilerMutex); + return RMT_ERROR_NONE; + } + } + + if (thread_profilers->nbThreadProfilers+1 > thread_profilers->maxNbThreadProfilers) + { + mtxUnlock(&thread_profilers->threadProfilerMutex); + return RMT_ERROR_MALLOC_FAIL; + } + + // Thread info not found so create a new one at the end + thread_profiler = thread_profilers->threadProfilers + thread_profilers->nbThreadProfilers; + error = ThreadProfiler_Constructor(thread_profilers->mqToRmtThread, thread_profiler, thread_id); + if (error != RMT_ERROR_NONE) + { + ThreadProfiler_Destructor(thread_profiler); + mtxUnlock(&thread_profilers->threadProfilerMutex); + return error; + } + *out_thread_profiler = thread_profiler; + + // Increment count for consume by read iterators + // Within the mutex so that there are no race conditions creating thread profilers + // Using release semantics to ensure a memory barrier for read iterators + StoreRelease(&thread_profilers->nbThreadProfilers, thread_profilers->nbThreadProfilers + 1); + + mtxUnlock(&thread_profilers->threadProfilerMutex); + + return RMT_ERROR_NONE; +} + +static rmtError ThreadProfilers_GetCurrentThreadProfiler(ThreadProfilers* thread_profilers, ThreadProfiler** out_thread_profiler) +{ + // Is there a thread profiler associated with this thread yet? + *out_thread_profiler = (ThreadProfiler*)tlsGet(thread_profilers->threadProfilerTlsHandle); + if (*out_thread_profiler == NULL) + { + // Allocate on-demand + rmtTry(ThreadProfilers_GetThreadProfiler(thread_profilers, rmtGetCurrentThreadId(), out_thread_profiler)); + + // Bind to the curren thread + tlsSet(thread_profilers->threadProfilerTlsHandle, *out_thread_profiler); + } + + return RMT_ERROR_NONE; +} + +static rmtBool ThreadProfilers_ThreadInCallback(ThreadProfilers* thread_profilers, rmtCpuContext* context) +{ +#ifdef RMT_PLATFORM_WINDOWS +#ifdef RMT_ARCH_32BIT + if (context->Eip >= (DWORD)thread_profilers->compiledSampleFn && + context->Eip < (DWORD)((char*)thread_profilers->compiledSampleFn + thread_profilers->compiledSampleFnSize)) + { + return RMT_TRUE; + } +#else + if (context->Rip >= (DWORD64)thread_profilers->compiledSampleFn && + context->Rip < (DWORD64)((char*)thread_profilers->compiledSampleFn + thread_profilers->compiledSampleFnSize)) + { + return RMT_TRUE; + } +#endif +#endif + return RMT_FALSE; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @TGATHER: Thread Gatherer, periodically polling for newly created threads +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +static void GatherThreads(ThreadProfilers* thread_profilers) +{ + rmtThreadHandle handle; + + assert(thread_profilers != NULL); + +#ifdef RMT_ENABLE_THREAD_SAMPLER + + // Create the snapshot - this is a slow call + handle = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0); + if (handle != INVALID_HANDLE_VALUE) + { + BOOL success; + + THREADENTRY32 thread_entry; + thread_entry.dwSize = sizeof(thread_entry); + + // Loop through all threads owned by this process + success = Thread32First(handle, &thread_entry); + while (success == TRUE) + { + if (thread_entry.th32OwnerProcessID == GetCurrentProcessId()) + { + // Create thread profilers on-demand if there're not already there + ThreadProfiler* thread_profiler; + rmtError error = ThreadProfilers_GetThreadProfiler(thread_profilers, thread_entry.th32ThreadID, &thread_profiler); + if (error != RMT_ERROR_NONE) + { + // Not really worth bringing the whole profiler down here + rmt_LogText("REMOTERY ERROR: Failed to create Thread Profiler"); + } + } + + success = Thread32Next(handle, &thread_entry); + } + + CloseHandle(handle); + } + +#endif +} + +static rmtError GatherThreadsLoop(rmtThread* thread) +{ + ThreadProfilers* thread_profilers = (ThreadProfilers*)thread->param; + rmtU32 sleep_time = 100; + + assert(thread_profilers != NULL); + + rmt_SetCurrentThreadName("RemoteryGatherThreads"); + + while (thread->request_exit == RMT_FALSE) + { + // We want a long period of time between scanning for new threads as the process is a little expensive (~30ms here). + // However not too long so as to miss potentially detailed process startup data. + // Use reduced sleep time at startup to catch as many early thread creations as possible. + // TODO(don): We could get processes to register themselves to ensure no startup data is lost but the scan must still + // be present, to catch threads in a process that the user doesn't create (e.g. graphics driver threads). + GatherThreads(thread_profilers); + msSleep(sleep_time); + sleep_time = minU32(sleep_time * 2, 2000); + } + + return RMT_ERROR_NONE; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @TSAMPLER: Sampling thread contexts +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +typedef struct Processor +{ + // Current thread profiler sampling this processor + ThreadProfiler* threadProfiler; + + rmtU32 sampleCount; + rmtU64 sampleTime; +} Processor; + +typedef struct Msg_ProcessorThreads +{ + // Running index of processor messages + rmtU64 messageIndex; + + // Processor array, leaking into the memory behind the struct + rmtU32 nbProcessors; + Processor processors[1]; +} Msg_ProcessorThreads; + +static void QueueProcessorThreads(rmtMessageQueue* queue, rmtU64 message_index, rmtU32 nb_processors, Processor* processors) +{ + Msg_ProcessorThreads* payload; + + // Attempt to allocate a message for sending processors to the viewer + rmtU32 array_size = (nb_processors - 1) * sizeof(Processor); + Message* message = rmtMessageQueue_AllocMessage(queue, sizeof(Msg_ProcessorThreads) + array_size, NULL); + if (message == NULL) + { + return; + } + + // Populate and commit + payload = (Msg_ProcessorThreads*)message->payload; + payload->messageIndex = message_index; + payload->nbProcessors = nb_processors; + memcpy(payload->processors, processors, nb_processors * sizeof(Processor)); + rmtMessageQueue_CommitMessage(message, MsgID_ProcessorThreads); +} + +#ifdef RMT_PLATFORM_WINDOWS +#if defined(RMT_ARCH_32BIT) +__declspec(naked) static void SampleCallback() +{ + // + // It's important to realise that this call can be pre-empted by the scheduler and shifted to another processor *while we are + // sampling which processor this thread is on*. + // + // This has two very important implications: + // + // * What we are sampling here is an *approximation* of the path of threads across processors. + // * These samples can't be used to "open" and "close" sample periods on a processor as it's highly likely you'll get many + // open events without a close, or vice versa. + // + // As such, we can only choose a sampling period and for each sample register which threads are on which processor. + // + // This is very different to hooking up the Event Tracing API (requiring Administrator elevation), which raises events for + // each context switch, directly from the kernel. + // + + __asm + { + // Push the EIP return address used by the final ret instruction + push ebx + + // We might be in the middle of something like a cmp/jmp instruction pair so preserve EFLAGS + // (Classic example which seems to pop up regularly is _RTC_CheckESP, with cmp/call/jne) + pushfd + + // Push all volatile registers as we don't know what the function calls below will destroy + push eax + push ecx + push edx + + // Retrieve and store the current processor index + call esi + mov [edi].processorIndex, eax + + // Mark as ready for scheduling another callback + // Intel x86 store release + mov [edi].nbSamplesWithoutCallback, 0 + + // Restore preserved register state + pop edx + pop ecx + pop eax + + // Restore registers used to provide parameters to the callback + mov ebx, dword ptr [edi].registerBackup0 + mov esi, dword ptr [edi].registerBackup1 + mov edi, dword ptr [edi].registerBackup2 + + // Restore EFLAGS + popfd + + // Pops the original EIP off the stack and jmps to origin suspend point in the thread + ret + } +} +#elif defined(RMT_ARCH_64BIT) +// Generated with https://defuse.ca/online-x86-assembler.htm +static rmtU8 SampleCallbackBytes[] = +{ + // Push the RIP return address used by the final ret instruction + 0x53, // push rbx + + // We might be in the middle of something like a cmp/jmp instruction pair so preserve RFLAGS + // (Classic example which seems to pop up regularly is _RTC_CheckESP, with cmp/call/jne) + 0x9C, // pushfq + + // Push all volatile registers as we don't know what the function calls below will destroy + 0x50, // push rax + 0x51, // push rcx + 0x52, // push rdx + 0x41, 0x50, // push r8 + 0x41, 0x51, // push r9 + 0x41, 0x52, // push r10 + 0x41, 0x53, // push r11 + + // Retrieve and store the current processor index + 0xFF, 0xD6, // call rsi + 0x89, 0x47, 0x1C, // mov dword ptr [rdi + 28], eax + + // Mark as ready for scheduling another callback + // Intel x64 store release + 0xC7, 0x47, 0x18, 0x00, 0x00, 0x00, 0x00, // mov dword ptr [rdi + 24], 0 + + // Restore preserved register state + 0x41, 0x5B, // pop r11 + 0x41, 0x5A, // pop r10 + 0x41, 0x59, // pop r9 + 0x41, 0x58, // pop r8 + 0x5A, // pop rdx + 0x59, // pop rcx + 0x58, // pop rax + + // Restore registers used to provide parameters to the callback + 0x48, 0x8B, 0x1F, // mov rbx, qword ptr [rdi + 0] + 0x48, 0x8B, 0x77, 0x08, // mov rsi, qword ptr [rdi + 8] + 0x48, 0x8B, 0x7F, 0x10, // mov rdi, qword ptr [rdi + 16] + + // Restore RFLAGS + 0x9D, // popfq + + // Pops the original EIP off the stack and jmps to origin suspend point in the thread + 0xC3 // ret +}; +static void* CreateSampleCallback(rmtU32* out_size) +{ + // Allocate page for the generated code + DWORD size = 4096; + DWORD old_protect; + void* function = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (function == NULL) + { + return NULL; + } + + // Clear whole allocation to int 3h + memset(function, 0xCC, size); + + // Copy over the generated code + memcpy(function, SampleCallbackBytes, sizeof(SampleCallbackBytes)); + *out_size = sizeof(SampleCallbackBytes); + + // Enable execution + VirtualProtect(function, size, PAGE_EXECUTE_READ, &old_protect); + return function; +} +#endif +#endif + +#if defined(__cplusplus) && __cplusplus >= 201103L +static_assert(offsetof(ThreadProfiler, nbSamplesWithoutCallback) == 24, ""); +static_assert(offsetof(ThreadProfiler, processorIndex) == 28, ""); +#endif + +static rmtError CheckForStallingSamples(SampleTree* stalling_sample_tree, ThreadProfiler* thread_profiler, rmtU64 sample_time_us) +{ + SampleTree* sample_tree; + rmtU32 sample_time_s = (rmtU32)(sample_time_us / 1000); + + // Initialise to empty + stalling_sample_tree->root = NULL; + stalling_sample_tree->allocator = NULL; + + // Skip the stall check if the tree is being modified + sample_tree = thread_profiler->sampleTrees[RMT_SampleType_CPU]; + if (LoadAcquire(&sample_tree->treeBeingModified) != 0) + { + return RMT_ERROR_NONE; + } + + if (sample_tree != NULL) + { + // The root is a dummy root inserted on tree creation so check that for children + Sample* root_sample = sample_tree->root; + if (root_sample != NULL && root_sample->nb_children > 0) + { + if (sample_time_s - LoadAcquire(&sample_tree->msLastTreeSendTime) > 1000) + { + rmtTry(MakePartialTreeCopy(sample_tree, sample_time_us, stalling_sample_tree)); + } + } + } + + return RMT_ERROR_NONE; +} + +static rmtError InitThreadSampling(ThreadProfilers* thread_profilers) +{ + rmt_SetCurrentThreadName("RemoterySampleThreads"); + + // Make an initial gather so that we have something to work with + GatherThreads(thread_profilers); + +#ifdef RMT_ENABLE_THREAD_SAMPLER + // Ensure we can wake up every millisecond + if (timeBeginPeriod(1) != TIMERR_NOERROR) + { + return RMT_ERROR_UNKNOWN; + } +#endif + + // Kick-off the background thread that watches for new threads + rmtTryNew(rmtThread, thread_profilers->threadGatherThread, GatherThreadsLoop, thread_profilers); + + // We're going to be shuffling thread visits to avoid the scheduler trying to predict a work-load based on sampling + // Use the global RNG with a random seed to start the shuffle + Well512_Init((rmtU32)time(NULL)); + + return RMT_ERROR_NONE; +} + +static rmtError SampleThreadsLoop(rmtThread* rmt_thread) +{ + rmtCpuContext context; + rmtU32 processor_message_index = 0; + rmtU32 nb_processors; + Processor* processors; + rmtU32 processor_index; + + ThreadProfilers* thread_profilers = (ThreadProfilers*)rmt_thread->param; + + // If we can't figure out how many processors there are then we are running on an unsupported platform + nb_processors = rmtGetNbProcessors(); + if (nb_processors == 0) + { + return RMT_ERROR_UNKNOWN; + } + + rmtTry(InitThreadSampling(thread_profilers)); + + // An array entry for each processor + rmtTryMallocArray(Processor, processors, nb_processors); + for (processor_index = 0; processor_index < nb_processors; processor_index++) + { + processors[processor_index].threadProfiler = NULL; + processors[processor_index].sampleTime = 0; + } + + while (rmt_thread->request_exit == RMT_FALSE) + { + rmtU32 lfsr_seed; + rmtU32 lfsr_value; + + // Query how many threads the gather knows about this time round + rmtU32 nb_thread_profilers = LoadAcquire(&thread_profilers->nbThreadProfilers); + + // Calculate table size log2 required to fit count entries. Normally we would adjust the log2 input by -1 so that + // power-of-2 counts map to their exact bit offset and don't require a twice larger table. You can iterate indices + // 0 to (1<= nb_thread_profilers) + { + continue; + } + + // Ignore our own thread + thread_id = rmtGetCurrentThreadId(); + thread_profiler = thread_profilers->threadProfilers + thread_index; + if (thread_profiler->threadId == thread_id) + { + continue; + } + + // Suspend the thread so we can insert a callback + thread_handle = thread_profiler->threadHandle; + if (rmtSuspendThread(thread_handle) == RMT_FALSE) + { + continue; + } + + // Mark the processor this thread was last recorded as running on. + // Note that a thread might be pre-empted multiple times in-between sampling. Given a sampling rate equal to the + // scheduling quantum, this doesn't happen too often. However in such cases, whoever marks the processor last is + // the one that gets recorded. + sample_time_us = usTimer_Get(thread_profilers->timer); + sample_count = AtomicAddS32(&thread_profiler->nbSamplesWithoutCallback, 1); + processor_index = thread_profiler->processorIndex; + if (processor_index != (rmtU32)-1) + { + assert(processor_index < nb_processors); + processors[processor_index].threadProfiler = thread_profiler; + processors[processor_index].sampleCount = sample_count; + processors[processor_index].sampleTime = sample_time_us; + } + + // Swap in a new context with our callback if one is not already scheduled on this thread + if (sample_count == 0) + { + if (rmtGetUserModeThreadContext(thread_handle, &context) == RMT_TRUE && + // There is a slight window of opportunity, after which the callback sets nbSamplesWithoutCallback=0, + // for this loop to suspend a thread while it's executing the last instructions of the callback. + ThreadProfilers_ThreadInCallback(thread_profilers, &context) == RMT_FALSE) + { + #ifdef RMT_PLATFORM_WINDOWS + #ifdef RMT_ARCH_64BIT + thread_profiler->registerBackup0 = context.Rbx; + thread_profiler->registerBackup1 = context.Rsi; + thread_profiler->registerBackup2 = context.Rdi; + context.Rbx = context.Rip; + context.Rsi = (rmtU64)GetCurrentProcessorNumber; + context.Rdi = (rmtU64)thread_profiler; + context.Rip = (DWORD64)thread_profilers->compiledSampleFn; + #endif + #ifdef RMT_ARCH_32BIT + thread_profiler->registerBackup0 = context.Ebx; + thread_profiler->registerBackup1 = context.Esi; + thread_profiler->registerBackup2 = context.Edi; + context.Ebx = context.Eip; + context.Esi = (rmtU32)GetCurrentProcessorNumber; + context.Edi = (rmtU32)thread_profiler; + context.Eip = (DWORD)&SampleCallback; + #endif + #endif + + rmtSetThreadContext(thread_handle, &context); + } + else + { + AtomicAddS32(&thread_profiler->nbSamplesWithoutCallback, -1); + } + } + + // While the thread is suspended take the chance to check for samples trees that may never complete + // Because SuspendThread on Windows is an async request, this needs to be placed at a point where the request completes + // Calling GetThreadContext will ensure the request is completed so this stall check is placed after that + if (RMT_ERROR_NONE != CheckForStallingSamples(&stalling_sample_tree, thread_profiler, sample_time_us)) + { + assert(stalling_sample_tree.allocator != NULL); + if (stalling_sample_tree.root != NULL) + { + FreeSamples(stalling_sample_tree.root, stalling_sample_tree.allocator); + } + } + + rmtResumeThread(thread_handle); + + if (stalling_sample_tree.root != NULL) + { + // If there is stalling sample tree on this thread then send it to listeners. + // Do the send *outside* of all Suspend/Resume calls as we have no way of knowing who is reading/writing the queue + // Mark this as partial so that the listeners know it will be overwritten. + Sample* sample = stalling_sample_tree.root->first_child; + assert(sample != NULL); + QueueSampleTree(thread_profilers->mqToRmtThread, sample, stalling_sample_tree.allocator, thread_profiler->threadName, 0, thread_profiler, RMT_TRUE); + + // The stalling_sample_tree.root->first_child has been sent to the main Remotery thread. This will get released later + // when the Remotery thread has processed it. This leaves the stalling_sample_tree.root here that must be freed. + // Before freeing the root sample we have to detach the children though. + stalling_sample_tree.root->first_child = NULL; + stalling_sample_tree.root->last_child = NULL; + stalling_sample_tree.root->nb_children = 0; + assert(stalling_sample_tree.allocator != NULL); + FreeSamples(stalling_sample_tree.root, stalling_sample_tree.allocator); + } + + + } while (lfsr_value != lfsr_seed); + + // Filter all processor samples made in this pass + for (processor_index = 0; processor_index < nb_processors; processor_index++) + { + Processor* processor = processors + processor_index; + ThreadProfiler* thread_profiler = processor->threadProfiler; + + if (thread_profiler != NULL) + { + // If this thread was on another processor on a previous pass and that processor is still tracking that thread, + // remove the thread from it. + rmtU32 last_processor_index = thread_profiler->lastProcessorIndex; + if (last_processor_index != (rmtU32)-1 && last_processor_index != processor_index) + { + assert(last_processor_index < nb_processors); + if (processors[last_processor_index].threadProfiler == thread_profiler) + { + processors[last_processor_index].threadProfiler = NULL; + } + } + + // When the thread is still on the same processor, check to see if it hasn't triggered the callback within another + // pass. This suggests the thread has gone to sleep and is no longer assigned to any thread. + else if (processor->sampleCount > 1) + { + processor->threadProfiler = NULL; + } + + thread_profiler->lastProcessorIndex = thread_profiler->processorIndex; + } + } + + // Send current processor state off to remotery + QueueProcessorThreads(thread_profilers->mqToRmtThread, processor_message_index++, nb_processors, processors); + } + + rmtDelete(rmtThread, thread_profilers->threadGatherThread); + +#ifdef RMT_ENABLE_THREAD_SAMPLER + timeEndPeriod(1); +#endif + + rmtFree(processors); + + return RMT_ERROR_NONE; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @REMOTERY: Remotery +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#if RMT_USE_OPENGL +typedef struct OpenGL_t OpenGL; +static rmtError OpenGL_Create(OpenGL** opengl); +static void OpenGL_Destructor(OpenGL* opengl); +#endif + +#if RMT_USE_METAL +typedef struct Metal_t Metal; +static rmtError Metal_Create(Metal** metal); +static void Metal_Destructor(Metal* metal); +#endif + +typedef struct PropertySnapshot +{ + // Inherit so that property states can be quickly allocated + ObjectLink Link; + + // Data copied from the property at the time of the snapshot + rmtPropertyType type; + rmtPropertyValue value; + rmtPropertyValue prevValue; + rmtU32 prevValueFrame; + rmtU32 nameHash; + rmtU32 uniqueID; + + // Depth calculated as part of the walk + rmtU8 depth; + + // Link to the next property snapshot + rmtU32 nbChildren; + struct PropertySnapshot* nextSnapshot; +} PropertySnapshot; + +typedef struct Msg_PropertySnapshot +{ + PropertySnapshot* rootSnapshot; + rmtU32 nbSnapshots; + rmtU32 propertyFrame; +} Msg_PropertySnapshot; + +static rmtError PropertySnapshot_Constructor(PropertySnapshot* snapshot) +{ + assert(snapshot != NULL); + + ObjectLink_Constructor((ObjectLink*)snapshot); + + snapshot->type = RMT_PropertyType_rmtBool; + snapshot->value.Bool = RMT_FALSE; + snapshot->nameHash = 0; + snapshot->uniqueID = 0; + snapshot->nbChildren = 0; + snapshot->depth = 0; + snapshot->nextSnapshot = NULL; + + return RMT_ERROR_NONE; +} + +static void PropertySnapshot_Destructor(PropertySnapshot* snapshot) +{ + RMT_UNREFERENCED_PARAMETER(snapshot); +} + +struct Remotery +{ + Server* server; + + // Microsecond accuracy timer for CPU timestamps + usTimer timer; + + // Queue between clients and main remotery thread + rmtMessageQueue* mq_to_rmt_thread; + + // The main server thread + rmtThread* thread; + + // String table shared by all threads + StringTable* string_table; + + // Open logfile handle to append events to + FILE* logfile; + + // Set to trigger a map of each message on the remotery thread message queue + void (*map_message_queue_fn)(Remotery* rmt, Message*); + void* map_message_queue_data; + +#if RMT_USE_CUDA + rmtCUDABind cuda; +#endif + +#if RMT_USE_OPENGL + OpenGL* opengl; +#endif + +#if RMT_USE_METAL + Metal* metal; +#endif + +#if RMT_USE_D3D12 + // Linked list of all D3D12 queue samplers + rmtMutex d3d12BindsMutex; + struct D3D12BindImpl* d3d12Binds; +#endif + +#if RMT_USE_VULKAN + // Linked list of all Vulkan queue samplers + rmtMutex vulkanBindsMutex; + struct VulkanBindImpl* vulkanBinds; +#endif + + ThreadProfilers* threadProfilers; + + // Root of all registered properties, guarded by mutex as property register can come from any thread + rmtMutex propertyMutex; + rmtProperty rootProperty; + + // Allocator for property values that get sent to the viewer + ObjectAllocator* propertyAllocator; + + // Frame used to determine age of property changes + rmtU32 propertyFrame; + + rmtAtomicS32 countThreads; +}; + +// +// Global remotery context +// +static Remotery* g_Remotery = NULL; + +// +// This flag marks the EXE/DLL that created the global remotery instance. We want to allow +// only the creating EXE/DLL to destroy the remotery instance. +// +static rmtBool g_RemoteryCreated = RMT_FALSE; + +static void rmtGetThreadNameFallback(char* out_thread_name, rmtU32 thread_name_size) +{ + // In cases where we can't get a thread name from the OS + out_thread_name[0] = 0; + strncat_s(out_thread_name, thread_name_size, "Thread", 6); + itoahex_s(out_thread_name + 6, thread_name_size - 6, AtomicAddS32(&g_Remotery->countThreads, 1)); +} + +static double saturate(double v) +{ + if (v < 0) + { + return 0; + } + if (v > 1) + { + return 1; + } + return v; +} + +static void PostProcessSamples(Sample* sample, rmtU32* nb_samples) +{ + Sample* child; + + assert(sample != NULL); + assert(nb_samples != NULL); + + (*nb_samples)++; + + { + // Hash integer line position to full hue + double h = (double)sample->name_hash / (double)0xFFFFFFFF; + double r = saturate(fabs(fmod(h * 6 + 0, 6) - 3) - 1); + double g = saturate(fabs(fmod(h * 6 + 4, 6) - 3) - 1); + double b = saturate(fabs(fmod(h * 6 + 2, 6) - 3) - 1); + + // Cubic smooth + r = r * r * (3 - 2 * r); + g = g * g * (3 - 2 * g); + b = b * b * (3 - 2 * b); + + // Lerp to HSV lightness a little + double k = 0.4; + r = r * k + (1 - k); + g = g * k + (1 - k); + b = b * k + (1 - k); + + // To RGB8 + sample->uniqueColour[0] = (rmtU8)maxS32(minS32((rmtS32)(r * 255), 255), 0); + sample->uniqueColour[1] = (rmtU8)maxS32(minS32((rmtS32)(g * 255), 255), 0); + sample->uniqueColour[2] = (rmtU8)maxS32(minS32((rmtS32)(b * 255), 255), 0); + + //rmtU32 hash = sample->name_hash; + //sample->uniqueColour[0] = 127 + ((hash & 255) >> 1); + //sample->uniqueColour[1] = 127 + (((hash >> 4) & 255) >> 1); + //sample->uniqueColour[2] = 127 + (((hash >> 8) & 255) >> 1); + } + + // Concatenate children + for (child = sample->first_child; child != NULL; child = child->next_sibling) + { + PostProcessSamples(child, nb_samples); + } +} + +static rmtError Remotery_SendLogTextMessage(Remotery* rmt, Message* message) +{ + Buffer* bin_buf; + rmtU32 write_start_offset; + + // Build the buffer as if it's being sent to the server + assert(rmt != NULL); + assert(message != NULL); + bin_buf = rmt->server->bin_buf; + WebSocket_PrepareBuffer(bin_buf); + rmtTry(bin_MessageHeader(bin_buf, "LOGM", &write_start_offset)); + rmtTry(Buffer_Write(bin_buf, message->payload, message->payload_size)); + rmtTry(bin_MessageFooter(bin_buf, write_start_offset)); + + // Pass to either the server or the log file + if (rmt->logfile != NULL) + { + rmtWriteFile(rmt->logfile, bin_buf->data + WEBSOCKET_MAX_FRAME_HEADER_SIZE, bin_buf->bytes_used - WEBSOCKET_MAX_FRAME_HEADER_SIZE); + } + if (Server_IsClientConnected(rmt->server) == RMT_TRUE) + { + rmtTry(Server_Send(rmt->server, bin_buf->data, bin_buf->bytes_used, 20)); + } + + return RMT_ERROR_NONE; +} + +static rmtError bin_SampleName(Buffer* buffer, const char* name, rmtU32 name_hash, rmtU32 name_length) +{ + rmtU32 write_start_offset; + rmtTry(bin_MessageHeader(buffer, "SSMP", &write_start_offset)); + rmtTry(Buffer_WriteU32(buffer, name_hash)); + rmtTry(Buffer_WriteU32(buffer, name_length)); + rmtTry(Buffer_Write(buffer, (void*)name, name_length)); + rmtTry(bin_MessageFooter(buffer, write_start_offset)); + + return RMT_ERROR_NONE; +} + +static rmtError Remotery_AddToStringTable(Remotery* rmt, Message* message) +{ + // Add to the string table + Msg_AddToStringTable* payload = (Msg_AddToStringTable*)message->payload; + const char* name = (const char*)(payload + 1); + rmtBool name_inserted = StringTable_Insert(rmt->string_table, payload->hash, name); + + // Emit to log file if one is open + if (name_inserted == RMT_TRUE && rmt->logfile != NULL) + { + Buffer* bin_buf = rmt->server->bin_buf; + bin_buf->bytes_used = 0; + rmtTry(bin_SampleName(bin_buf, name, payload->hash, payload->length)); + + rmtWriteFile(rmt->logfile, bin_buf->data, bin_buf->bytes_used); + } + + return RMT_ERROR_NONE; +} + +static rmtError bin_SampleTree(Buffer* buffer, Msg_SampleTree* msg) +{ + Sample* root_sample; + char thread_name[256]; + rmtU32 nb_samples = 0; + rmtU32 write_start_offset = 0; + + assert(buffer != NULL); + assert(msg != NULL); + + // Get the message root sample + root_sample = msg->rootSample; + assert(root_sample != NULL); + + // Add any sample types as a thread name post-fix to ensure they get their own viewer + thread_name[0] = 0; + strncat_s(thread_name, sizeof(thread_name), msg->threadName, strnlen_s(msg->threadName, 255)); + if (root_sample->type == RMT_SampleType_CUDA) + { + strncat_s(thread_name, sizeof(thread_name), " (CUDA)", 7); + } + if (root_sample->type == RMT_SampleType_D3D11) + { + strncat_s(thread_name, sizeof(thread_name), " (D3D11)", 8); + } + if (root_sample->type == RMT_SampleType_D3D12) + { + strncat_s(thread_name, sizeof(thread_name), " (D3D12)", 8); + } + if (root_sample->type == RMT_SampleType_OpenGL) + { + strncat_s(thread_name, sizeof(thread_name), " (OpenGL)", 9); + } + if (root_sample->type == RMT_SampleType_Metal) + { + strncat_s(thread_name, sizeof(thread_name), " (Metal)", 8); + } + if (root_sample->type == RMT_SampleType_Vulkan) + { + strncat_s(thread_name, sizeof(thread_name), " (Vulkan)", 9); + } + + // Get digest hash of samples so that viewer can efficiently rebuild its tables + PostProcessSamples(root_sample, &nb_samples); + + // Write sample message header + rmtTry(bin_MessageHeader(buffer, "SMPL", &write_start_offset)); + rmtTry(Buffer_WriteStringWithLength(buffer, thread_name)); + rmtTry(Buffer_WriteU32(buffer, nb_samples)); + rmtTry(Buffer_WriteU32(buffer, msg->partialTree ? 1 : 0)); + + // Align serialised sample tree to 32-bit boundary + rmtTry(Buffer_AlignedPad(buffer, write_start_offset)); + + // Write entire sample tree + rmtTry(bin_Sample(buffer, root_sample, 0)); + + rmtTry(bin_MessageFooter(buffer, write_start_offset)); + + return RMT_ERROR_NONE; +} + +#if RMT_USE_CUDA +static rmtBool AreCUDASamplesReady(Sample* sample); +static rmtBool GetCUDASampleTimes(Sample* root_sample, Sample* sample); +#endif + +static rmtError Remotery_SendToViewerAndLog(Remotery* rmt, Buffer* bin_buf, rmtU32 timeout) +{ + rmtError error = RMT_ERROR_NONE; + + if (Server_IsClientConnected(rmt->server) == RMT_TRUE) + { + rmt_BeginCPUSample(Server_Send, RMTSF_Aggregate); + error = Server_Send(rmt->server, bin_buf->data, bin_buf->bytes_used, timeout); + rmt_EndCPUSample(); + } + + if (rmt->logfile != NULL) + { + // Write the data after the websocket header + rmtWriteFile(rmt->logfile, bin_buf->data + WEBSOCKET_MAX_FRAME_HEADER_SIZE, bin_buf->bytes_used - WEBSOCKET_MAX_FRAME_HEADER_SIZE); + } + + return error; +} + +static rmtError Remotery_SendSampleTreeMessage(Remotery* rmt, Message* message) +{ + rmtError error = RMT_ERROR_NONE; + + Msg_SampleTree* sample_tree; + Sample* sample; + Buffer* bin_buf; + + assert(rmt != NULL); + assert(message != NULL); + + // Get the message root sample + sample_tree = (Msg_SampleTree*)message->payload; + sample = sample_tree->rootSample; + assert(sample != NULL); + +#if RMT_USE_CUDA + if (sample->type == RMT_SampleType_CUDA) + { + // If these CUDA samples aren't ready yet, stick them to the back of the queue and continue + rmtBool are_samples_ready; + rmt_BeginCPUSample(AreCUDASamplesReady, 0); + are_samples_ready = AreCUDASamplesReady(sample); + rmt_EndCPUSample(); + if (!are_samples_ready) + { + QueueSampleTree(rmt->mq_to_rmt_thread, sample, sample_tree->allocator, sample_tree->threadName, 0, + message->threadProfiler, RMT_FALSE); + return RMT_ERROR_NONE; + } + + // Retrieve timing of all CUDA samples + rmt_BeginCPUSample(GetCUDASampleTimes, 0); + GetCUDASampleTimes(sample->parent, sample); + rmt_EndCPUSample(); + } +#endif + + // Reset the buffer for sending a websocket message + bin_buf = rmt->server->bin_buf; + WebSocket_PrepareBuffer(bin_buf); + + // Serialise the sample tree + rmt_BeginCPUSample(bin_SampleTree, RMTSF_Aggregate); + error = bin_SampleTree(bin_buf, sample_tree); + rmt_EndCPUSample(); + + if (g_Settings.sampletree_handler != NULL) + { + g_Settings.sampletree_handler(g_Settings.sampletree_context, sample_tree); + } + + // Release sample tree samples back to their allocator + FreeSamples(sample, sample_tree->allocator); + + if (error != RMT_ERROR_NONE) + { + return error; + } + + // Send to the viewer with a reasonably long timeout as the size of the sample data may be large + return Remotery_SendToViewerAndLog(rmt, bin_buf, 50000); +} + +static rmtError Remotery_SendProcessorThreads(Remotery* rmt, Message* message) +{ + rmtU32 processor_index; + + Msg_ProcessorThreads* processor_threads = (Msg_ProcessorThreads*)message->payload; + + Buffer* bin_buf; + rmtU32 write_start_offset; + + // Reset the buffer for sending a websocket message + bin_buf = rmt->server->bin_buf; + WebSocket_PrepareBuffer(bin_buf); + + // Serialise the message + rmtTry(bin_MessageHeader(bin_buf, "PRTH", &write_start_offset)); + rmtTry(Buffer_WriteU32(bin_buf, processor_threads->nbProcessors)); + rmtTry(Buffer_WriteU64(bin_buf, processor_threads->messageIndex)); + for (processor_index = 0; processor_index < processor_threads->nbProcessors; processor_index++) + { + Processor* processor = processor_threads->processors + processor_index; + if (processor->threadProfiler != NULL) + { + rmtTry(Buffer_WriteU32(bin_buf, processor->threadProfiler->threadId)); + rmtTry(Buffer_WriteU32(bin_buf, processor->threadProfiler->threadNameHash)); + rmtTry(Buffer_WriteU64(bin_buf, processor->sampleTime)); + } + else + { + rmtTry(Buffer_WriteU32(bin_buf, (rmtU32)-1)); + rmtTry(Buffer_WriteU32(bin_buf, 0)); + rmtTry(Buffer_WriteU64(bin_buf, 0)); + } + } + + rmtTry(bin_MessageFooter(bin_buf, write_start_offset)); + + return Remotery_SendToViewerAndLog(rmt, bin_buf, 50); +} + +static void FreePropertySnapshots(PropertySnapshot* snapshot) +{ + // Allows root call to pass null + if (snapshot == NULL) + { + return; + } + + // Depth first free + if (snapshot->nextSnapshot != NULL) + { + FreePropertySnapshots(snapshot->nextSnapshot); + } + + ObjectAllocator_Free(g_Remotery->propertyAllocator, snapshot); +} + +static rmtError Remotery_SerialisePropertySnapshots(Buffer* bin_buf, Msg_PropertySnapshot* msg_snapshot) +{ + PropertySnapshot* snapshot; + rmtU8 empty_group[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + rmtU32 write_start_offset; + + // Header + rmtTry(bin_MessageHeader(bin_buf, "PSNP", &write_start_offset)); + rmtTry(Buffer_WriteU32(bin_buf, msg_snapshot->nbSnapshots)); + rmtTry(Buffer_WriteU32(bin_buf, msg_snapshot->propertyFrame)); + + // Linearised snapshots + for (snapshot = msg_snapshot->rootSnapshot; snapshot != NULL; snapshot = snapshot->nextSnapshot) + { + rmtU8 colour_depth[4] = {0, 0, 0}; + + // Same place as samples so that the GPU renderer can easily pick them out + rmtTry(Buffer_WriteU32(bin_buf, snapshot->nameHash)); + rmtTry(Buffer_WriteU32(bin_buf, snapshot->uniqueID)); + + // 3 byte place holder for viewer-side colour, with snapshot depth packed next to it + colour_depth[3] = snapshot->depth; + rmtTry(Buffer_Write(bin_buf, colour_depth, 4)); + + // Dispatch on property type, but maintaining 64-bits per value + rmtTry(Buffer_WriteU32(bin_buf, snapshot->type)); + switch (snapshot->type) + { + // Empty + case RMT_PropertyType_rmtGroup: + rmtTry(Buffer_Write(bin_buf, empty_group, 16)); + break; + + // All value ranges here are double-representable, so convert them early in C where it's cheap + case RMT_PropertyType_rmtBool: + rmtTry(Buffer_WriteF64(bin_buf, snapshot->value.Bool)); + rmtTry(Buffer_WriteF64(bin_buf, snapshot->prevValue.Bool)); + break; + case RMT_PropertyType_rmtS32: + rmtTry(Buffer_WriteF64(bin_buf, snapshot->value.S32)); + rmtTry(Buffer_WriteF64(bin_buf, snapshot->prevValue.S32)); + break; + case RMT_PropertyType_rmtU32: + rmtTry(Buffer_WriteF64(bin_buf, snapshot->value.U32)); + rmtTry(Buffer_WriteF64(bin_buf, snapshot->prevValue.U32)); + break; + case RMT_PropertyType_rmtF32: + rmtTry(Buffer_WriteF64(bin_buf, snapshot->value.F32)); + rmtTry(Buffer_WriteF64(bin_buf, snapshot->prevValue.F32)); + break; + + // The high end of these are not double representable but store their full pattern so we don't lose data + case RMT_PropertyType_rmtS64: + case RMT_PropertyType_rmtU64: + rmtTry(Buffer_WriteU64(bin_buf, snapshot->value.U64)); + rmtTry(Buffer_WriteU64(bin_buf, snapshot->prevValue.U64)); + break; + + case RMT_PropertyType_rmtF64: + rmtTry(Buffer_WriteF64(bin_buf, snapshot->value.F64)); + rmtTry(Buffer_WriteF64(bin_buf, snapshot->prevValue.F64)); + break; + } + + rmtTry(Buffer_WriteU32(bin_buf, snapshot->prevValueFrame)); + rmtTry(Buffer_WriteU32(bin_buf, snapshot->nbChildren)); + } + + rmtTry(bin_MessageFooter(bin_buf, write_start_offset)); + + return RMT_ERROR_NONE; +} + +static rmtError Remotery_SendPropertySnapshot(Remotery* rmt, Message* message) +{ + Msg_PropertySnapshot* msg_snapshot = (Msg_PropertySnapshot*)message->payload; + + rmtError error = RMT_ERROR_NONE; + + Buffer* bin_buf; + + // Reset the buffer for sending a websocket message + bin_buf = rmt->server->bin_buf; + WebSocket_PrepareBuffer(bin_buf); + + // Serialise the message and send + error = Remotery_SerialisePropertySnapshots(bin_buf, msg_snapshot); + if (error == RMT_ERROR_NONE) + { + error = Remotery_SendToViewerAndLog(rmt, bin_buf, 50); + } + + FreePropertySnapshots(msg_snapshot->rootSnapshot); + + return error; +} + +static rmtError Remotery_ConsumeMessageQueue(Remotery* rmt) +{ + rmtU32 nb_messages_sent = 0; + const rmtU32 maxNbMessagesPerUpdate = g_Settings.maxNbMessagesPerUpdate; + + assert(rmt != NULL); + + // Loop reading the max number of messages for this update + // Note some messages don't consume the sent message count as they are small enough to not cause performance issues + while (nb_messages_sent < maxNbMessagesPerUpdate) + { + rmtError error = RMT_ERROR_NONE; + Message* message = rmtMessageQueue_PeekNextMessage(rmt->mq_to_rmt_thread); + if (message == NULL) + break; + + switch (message->id) + { + // This shouldn't be possible + case MsgID_NotReady: + assert(RMT_FALSE); + break; + + // Dispatch to message handler + case MsgID_AddToStringTable: + error = Remotery_AddToStringTable(rmt, message); + break; + case MsgID_LogText: + error = Remotery_SendLogTextMessage(rmt, message); + nb_messages_sent++; + break; + case MsgID_SampleTree: + rmt_BeginCPUSample(SendSampleTreeMessage, RMTSF_Aggregate); + error = Remotery_SendSampleTreeMessage(rmt, message); + nb_messages_sent++; + rmt_EndCPUSample(); + break; + case MsgID_ProcessorThreads: + Remotery_SendProcessorThreads(rmt, message); + nb_messages_sent++; + break; + case MsgID_PropertySnapshot: + error = Remotery_SendPropertySnapshot(rmt, message); + break; + + default: + break; + } + + // Consume the message before reacting to any errors + rmtMessageQueue_ConsumeNextMessage(rmt->mq_to_rmt_thread, message); + if (error != RMT_ERROR_NONE) + { + return error; + } + } + + return RMT_ERROR_NONE; +} + +static void Remotery_FlushMessageQueue(Remotery* rmt) +{ + assert(rmt != NULL); + + // Loop reading all remaining messages + for (;;) + { + Message* message = rmtMessageQueue_PeekNextMessage(rmt->mq_to_rmt_thread); + if (message == NULL) + break; + + switch (message->id) + { + // These can be safely ignored + case MsgID_NotReady: + case MsgID_AddToStringTable: + case MsgID_LogText: + break; + + // Release all samples back to their allocators + case MsgID_SampleTree: { + Msg_SampleTree* sample_tree = (Msg_SampleTree*)message->payload; + FreeSamples(sample_tree->rootSample, sample_tree->allocator); + break; + } + + case MsgID_PropertySnapshot: { + Msg_PropertySnapshot* msg_snapshot = (Msg_PropertySnapshot*)message->payload; + FreePropertySnapshots(msg_snapshot->rootSnapshot); + break; + } + + default: + break; + } + + rmtMessageQueue_ConsumeNextMessage(rmt->mq_to_rmt_thread, message); + } +} + +static void Remotery_MapMessageQueue(Remotery* rmt) +{ + rmtU32 read_pos, write_pos; + rmtMessageQueue* queue; + + assert(rmt != NULL); + + // Wait until the caller sets the custom data + while (LoadAcquirePointer((long* volatile*)&rmt->map_message_queue_data) == NULL) + msSleep(1); + + // Snapshot the current write position so that we're not constantly chasing other threads + // that can have no effect on the thread requesting the map. + queue = rmt->mq_to_rmt_thread; + write_pos = LoadAcquire(&queue->write_pos); + + // Walk every message in the queue and call the map function + read_pos = queue->read_pos; + while (read_pos < write_pos) + { + rmtU32 r = read_pos & (queue->size - 1); + Message* message = (Message*)(queue->data->ptr + r); + rmtU32 message_size = rmtMessageQueue_SizeForPayload(message->payload_size); + rmt->map_message_queue_fn(rmt, message); + read_pos += message_size; + } + + StoreReleasePointer((long* volatile*)&rmt->map_message_queue_data, NULL); +} + +static rmtError Remotery_ThreadMain(rmtThread* thread) +{ + Remotery* rmt = (Remotery*)thread->param; + assert(rmt != NULL); + + rmt_SetCurrentThreadName("Remotery"); + + while (thread->request_exit == RMT_FALSE) + { + rmt_BeginCPUSample(Wakeup, 0); + + rmt_BeginCPUSample(ServerUpdate, 0); + Server_Update(rmt->server); + rmt_EndCPUSample(); + + rmt_BeginCPUSample(ConsumeMessageQueue, 0); + Remotery_ConsumeMessageQueue(rmt); + rmt_EndCPUSample(); + + rmt_EndCPUSample(); + + // Process any queue map requests + if (LoadAcquirePointer((long* volatile*)&rmt->map_message_queue_fn) != NULL) + { + Remotery_MapMessageQueue(rmt); + StoreReleasePointer((long* volatile*)&rmt->map_message_queue_fn, NULL); + } + + // + // [NOTE-A] + // + // Possible sequence of user events at this point: + // + // 1. Add samples to the queue. + // 2. Shutdown remotery. + // + // This loop will exit with unrelease samples. + // + + msSleep(g_Settings.msSleepBetweenServerUpdates); + } + + // Release all samples to their allocators as a consequence of [NOTE-A] + Remotery_FlushMessageQueue(rmt); + + return RMT_ERROR_NONE; +} + +static rmtError Remotery_ReceiveMessage(void* context, char* message_data, rmtU32 message_length) +{ + Remotery* rmt = (Remotery*)context; + +// Manual dispatch on 4-byte message headers (message ID is little-endian encoded) +#define FOURCC(a, b, c, d) (rmtU32)(((d) << 24) | ((c) << 16) | ((b) << 8) | (a)) + rmtU32 message_id = *(rmtU32*)message_data; + + switch (message_id) + { + case FOURCC('C', 'O', 'N', 'I'): { + rmt_LogText("Console message received..."); + rmt_LogText(message_data + 4); + + // Pass on to any registered handler + if (g_Settings.input_handler != NULL) + g_Settings.input_handler(message_data + 4, g_Settings.input_handler_context); + + break; + } + + case FOURCC('G', 'S', 'M', 'P'): { + rmtPStr name; + + // Convert name hash to integer + rmtU32 name_hash = 0; + const char* cur = message_data + 4; + const char* end = cur + message_length - 4; + while (cur < end) + name_hash = name_hash * 10 + *cur++ - '0'; + + // Search for a matching string hash + name = StringTable_Find(rmt->string_table, name_hash); + if (name != NULL) + { + rmtU32 name_length = (rmtU32)strnlen_s_safe_c(name, 256 - 12); + + // Construct a response message containing the matching name + Buffer* bin_buf = rmt->server->bin_buf; + WebSocket_PrepareBuffer(bin_buf); + bin_SampleName(bin_buf, name, name_hash, name_length); + + // Send back immediately as we're on the server thread + return Server_Send(rmt->server, bin_buf->data, bin_buf->bytes_used, 10); + } + + break; + } + } + +#undef FOURCC + + return RMT_ERROR_NONE; +} + +static rmtError Remotery_Constructor(Remotery* rmt) +{ + assert(rmt != NULL); + + // Set default state + rmt->server = NULL; + rmt->mq_to_rmt_thread = NULL; + rmt->thread = NULL; + rmt->string_table = NULL; + rmt->logfile = NULL; + rmt->map_message_queue_fn = NULL; + rmt->map_message_queue_data = NULL; + rmt->threadProfilers = NULL; + mtxInit(&rmt->propertyMutex); + rmt->propertyAllocator = NULL; + rmt->propertyFrame = 0; + + // Set default state on the root property + rmtProperty* root_property = &rmt->rootProperty; + root_property->initialised = RMT_TRUE; + root_property->type = RMT_PropertyType_rmtGroup; + root_property->value.Bool = RMT_FALSE; + root_property->flags = RMT_PropertyFlags_NoFlags; + root_property->name = "Root Property"; + root_property->description = ""; + root_property->defaultValue.Bool = RMT_FALSE; + root_property->parent = NULL; + root_property->firstChild = NULL; + root_property->lastChild = NULL; + root_property->nextSibling = NULL; + root_property->nameHash = 0; + root_property->uniqueID = 0; + +#if RMT_USE_CUDA + rmt->cuda.CtxSetCurrent = NULL; + rmt->cuda.EventCreate = NULL; + rmt->cuda.EventDestroy = NULL; + rmt->cuda.EventElapsedTime = NULL; + rmt->cuda.EventQuery = NULL; + rmt->cuda.EventRecord = NULL; +#endif + +#if RMT_USE_OPENGL + rmt->opengl = NULL; +#endif + +#if RMT_USE_METAL + rmt->metal = NULL; +#endif + +#if RMT_USE_D3D12 + mtxInit(&rmt->d3d12BindsMutex); + rmt->d3d12Binds = NULL; +#endif + +#if RMT_USE_VULKAN + mtxInit(&rmt->vulkanBindsMutex); + rmt->vulkanBinds = NULL; +#endif + + // Kick-off the timer + usTimer_Init(&rmt->timer); + + // Create the server + rmtTryNew(Server, rmt->server, g_Settings.port, g_Settings.reuse_open_port, g_Settings.limit_connections_to_localhost); + + // Setup incoming message handler + rmt->server->receive_handler = Remotery_ReceiveMessage; + rmt->server->receive_handler_context = rmt; + + // Create the main message thread with only one page + rmtTryNew(rmtMessageQueue, rmt->mq_to_rmt_thread, g_Settings.messageQueueSizeInBytes); + + // Create sample name string table + rmtTryNew(StringTable, rmt->string_table); + + if (g_Settings.logPath != NULL) + { + // Get current date/time + struct tm* now_tm = TimeDateNow(); + + // Start the log path off + char filename[512] = { 0 }; + strncat_s(filename, sizeof(filename), g_Settings.logPath, 512); + strncat_s(filename, sizeof(filename), "/remotery-log-", 14); + + // Append current date and time + strncat_s(filename, sizeof(filename), itoa_s(now_tm->tm_year + 1900), 11); + strncat_s(filename, sizeof(filename), "-", 1); + strncat_s(filename, sizeof(filename), itoa_s(now_tm->tm_mon + 1), 11); + strncat_s(filename, sizeof(filename), "-", 1); + strncat_s(filename, sizeof(filename), itoa_s(now_tm->tm_mday), 11); + strncat_s(filename, sizeof(filename), "-", 1); + strncat_s(filename, sizeof(filename), itoa_s(now_tm->tm_hour), 11); + strncat_s(filename, sizeof(filename), "-", 1); + strncat_s(filename, sizeof(filename), itoa_s(now_tm->tm_min), 11); + strncat_s(filename, sizeof(filename), "-", 1); + strncat_s(filename, sizeof(filename), itoa_s(now_tm->tm_sec), 11); + + // Just append a custom extension + strncat_s(filename, sizeof(filename), ".rbin", 5); + + // Open and assume any failure simply sets NULL and the file isn't written + rmt->logfile = rmtOpenFile(filename, "w"); + + // Write the header + if (rmt->logfile != NULL) + { + rmtWriteFile(rmt->logfile, "RMTBLOGF", 8); + } + } + +#if RMT_USE_OPENGL + rmtTry(OpenGL_Create(&rmt->opengl)); +#endif + +#if RMT_USE_METAL + rmtTry(Metal_Create(&rmt->metal)); +#endif + + // Create the thread profilers container + rmtTryNew(ThreadProfilers, rmt->threadProfilers, &rmt->timer, rmt->mq_to_rmt_thread); + + // Create the property state allocator + rmtTryNew(ObjectAllocator, rmt->propertyAllocator, sizeof(PropertySnapshot), (ObjConstructor)PropertySnapshot_Constructor, (ObjDestructor)PropertySnapshot_Destructor); + + // Set as the global instance before creating any threads that uses it for sampling itself + assert(g_Remotery == NULL); + g_Remotery = rmt; + g_RemoteryCreated = RMT_TRUE; + g_Remotery->countThreads = 0; + + // Ensure global instance writes complete before other threads get a chance to use it + CompilerWriteFence(); + + // Create the main update thread once everything has been defined for the global remotery object + rmtTryNew(rmtThread, rmt->thread, Remotery_ThreadMain, rmt); + + return RMT_ERROR_NONE; +} + +static void Remotery_Destructor(Remotery* rmt) +{ + assert(rmt != NULL); + +#if RMT_USE_VULKAN + while (rmt->vulkanBinds != NULL) + { + _rmt_UnbindVulkan((rmtVulkanBind*)rmt->vulkanBinds); + } + mtxDelete(&rmt->vulkanBindsMutex); +#endif + + // Join the remotery thread before clearing the global object as the thread is profiling itself + rmtDelete(rmtThread, rmt->thread); + + rmtDelete(ThreadProfilers, rmt->threadProfilers); + + rmtDelete(ObjectAllocator, rmt->propertyAllocator); + +#if RMT_USE_D3D12 + while (rmt->d3d12Binds != NULL) + { + _rmt_UnbindD3D12((rmtD3D12Bind*)rmt->d3d12Binds); + } + mtxDelete(&rmt->d3d12BindsMutex); +#endif + +#if RMT_USE_OPENGL + rmtDelete(OpenGL, rmt->opengl); +#endif + +#if RMT_USE_METAL + rmtDelete(Metal, rmt->metal); +#endif + + if (g_RemoteryCreated) + { + g_Remotery = NULL; + g_RemoteryCreated = RMT_FALSE; + } + + rmtCloseFile(rmt->logfile); + + rmtDelete(StringTable, rmt->string_table); + rmtDelete(rmtMessageQueue, rmt->mq_to_rmt_thread); + + rmtDelete(Server, rmt->server); + + // Free the error message TLS + // TODO(don): The allocated messages will need to be freed as well + if (g_lastErrorMessageTlsHandle != TLS_INVALID_HANDLE) + { + tlsFree(g_lastErrorMessageTlsHandle); + g_lastErrorMessageTlsHandle = TLS_INVALID_HANDLE; + } + + mtxDelete(&rmt->propertyMutex); +} + +static void* CRTMalloc(void* mm_context, rmtU32 size) +{ + RMT_UNREFERENCED_PARAMETER(mm_context); + return malloc((size_t)size); +} + +static void CRTFree(void* mm_context, void* ptr) +{ + RMT_UNREFERENCED_PARAMETER(mm_context); + free(ptr); +} + +static void* CRTRealloc(void* mm_context, void* ptr, rmtU32 size) +{ + RMT_UNREFERENCED_PARAMETER(mm_context); + return realloc(ptr, size); +} + +RMT_API rmtSettings* _rmt_Settings(void) +{ + // Default-initialize on first call + if (g_SettingsInitialized == RMT_FALSE) + { + g_Settings.port = 0x4597; + g_Settings.reuse_open_port = RMT_FALSE; + g_Settings.limit_connections_to_localhost = RMT_FALSE; + g_Settings.enableThreadSampler = RMT_TRUE; + g_Settings.msSleepBetweenServerUpdates = 4; + g_Settings.messageQueueSizeInBytes = 1024 * 1024; + g_Settings.maxNbMessagesPerUpdate = 1000; + g_Settings.malloc = CRTMalloc; + g_Settings.free = CRTFree; + g_Settings.realloc = CRTRealloc; + g_Settings.input_handler = NULL; + g_Settings.input_handler_context = NULL; + g_Settings.logPath = NULL; + g_Settings.sampletree_handler = NULL; + g_Settings.sampletree_context = NULL; + g_Settings.snapshot_callback = NULL; + g_Settings.snapshot_context = NULL; + + g_SettingsInitialized = RMT_TRUE; + } + + return &g_Settings; +} + +RMT_API rmtError _rmt_CreateGlobalInstance(Remotery** remotery) +{ + // Ensure load/acquire store/release operations match this enum size + assert(sizeof(MessageID) == sizeof(rmtU32)); + + // Default-initialise if user has not set values + rmt_Settings(); + + // Creating the Remotery instance also records it as the global instance + assert(remotery != NULL); + rmtTryNew(Remotery, *remotery); + return RMT_ERROR_NONE; +} + +RMT_API void _rmt_DestroyGlobalInstance(Remotery* remotery) +{ + // Ensure this is the module that created it + assert(g_RemoteryCreated == RMT_TRUE); + assert(g_Remotery == remotery); + rmtDelete(Remotery, remotery); +} + +RMT_API void _rmt_SetGlobalInstance(Remotery* remotery) +{ + // Default-initialise if user has not set values + rmt_Settings(); + + g_Remotery = remotery; +} + +RMT_API Remotery* _rmt_GetGlobalInstance(void) +{ + return g_Remotery; +} + +#ifdef RMT_PLATFORM_WINDOWS +#pragma pack(push, 8) +typedef struct tagTHREADNAME_INFO +{ + DWORD dwType; // Must be 0x1000. + LPCSTR szName; // Pointer to name (in user addr space). + DWORD dwThreadID; // Thread ID (-1=caller thread). + DWORD dwFlags; // Reserved for future use, must be zero. +} THREADNAME_INFO; +#pragma pack(pop) +#endif + +wchar_t* MakeWideString(const char* string) +{ + size_t wlen; + wchar_t* wstr; + + // First get the converted length +#if defined(RMT_PLATFORM_WINDOWS) && !RMT_USE_TINYCRT + if (mbstowcs_s(&wlen, NULL, 0, string, INT_MAX) != 0) + { + return NULL; + } +#else + wlen = mbstowcs(NULL, string, 256); +#endif + + // Allocate enough words for the converted result + wstr = (wchar_t*)(rmtMalloc((wlen + 1) * sizeof(wchar_t))); + if (wstr == NULL) + { + return NULL; + } + + // Convert +#if defined(RMT_PLATFORM_WINDOWS) && !RMT_USE_TINYCRT + if (mbstowcs_s(&wlen, wstr, wlen + 1, string, wlen) != 0) +#else + if (mbstowcs(wstr, string, wlen + 1) != wlen) +#endif + { + rmtFree(wstr); + return NULL; + } + + return wstr; +} + +static void SetDebuggerThreadName(const char* name) +{ +#ifdef RMT_PLATFORM_WINDOWS + THREADNAME_INFO info; + + // See if SetThreadDescription is available in this version of Windows + // Introduced in Windows 10 build 1607 + HMODULE kernel32 = GetModuleHandleA("Kernel32.dll"); + if (kernel32 != NULL) + { + typedef HRESULT(WINAPI* SETTHREADDESCRIPTION)(HANDLE hThread, PCWSTR lpThreadDescription); + SETTHREADDESCRIPTION SetThreadDescription = (SETTHREADDESCRIPTION)GetProcAddress(kernel32, "SetThreadDescription"); + if (SetThreadDescription != NULL) + { + // Create a wide-string version of the thread name + wchar_t* wstr = MakeWideString(name); + if (wstr != NULL) + { + // Set and return, leaving a fall-through for any failure cases to use the old exception method + SetThreadDescription(GetCurrentThread(), wstr); + rmtFree(wstr); + return; + } + } + } + + info.dwType = 0x1000; + info.szName = name; + info.dwThreadID = (DWORD)-1; + info.dwFlags = 0; + +#ifndef __MINGW32__ + __try + { + RaiseException(0x406D1388, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR*)&info); + } + __except (1 /* EXCEPTION_EXECUTE_HANDLER */) + { + } +#endif +#elif defined(RMT_PLATFORM_MACOS) + pthread_setname_np(name); +#else + RMT_UNREFERENCED_PARAMETER(name); +#endif + +#ifdef RMT_PLATFORM_LINUX + // pthread_setname_np is a non-standard GNU extension. + char name_clamp[16]; + name_clamp[0] = 0; + strncat_s(name_clamp, sizeof(name_clamp), name, 15); +#if defined(__FreeBSD__) || defined(__OpenBSD__) + pthread_set_name_np(pthread_self(), name_clamp); +#else + prctl(PR_SET_NAME, name_clamp, 0, 0, 0); +#endif +#endif +} + +RMT_API void _rmt_SetCurrentThreadName(rmtPStr thread_name) +{ + ThreadProfiler* thread_profiler; + rmtU32 name_length; + + if (g_Remotery == NULL) + { + return; + } + + // Get data for this thread + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) != RMT_ERROR_NONE) + { + return; + } + + // Copy name and apply to the debugger + strcpy_s(thread_profiler->threadName, sizeof(thread_profiler->threadName), thread_name); + thread_profiler->threadNameHash = _rmt_HashString32(thread_name, strnlen_s(thread_name, 64), 0); + SetDebuggerThreadName(thread_name); + + // Send the thread name for lookup +#if defined(RMT_PLATFORM_WINDOWS) || defined(RMT_PLATFORM_MACOS) + name_length = strnlen_s(thread_profiler->threadName, 64); + QueueAddToStringTable(g_Remotery->mq_to_rmt_thread, thread_profiler->threadNameHash, thread_name, name_length, NULL); +#endif +} + +static rmtBool QueueLine(rmtMessageQueue* queue, unsigned char* text, rmtU32 size, struct ThreadProfiler* thread_profiler) +{ + Message* message; + rmtU32 text_size; + + assert(queue != NULL); + + // Patch line size + text_size = size - 4; + U32ToByteArray(text, text_size); + + // Allocate some space for the line + message = rmtMessageQueue_AllocMessage(queue, size, thread_profiler); + if (message == NULL) + return RMT_FALSE; + + // Copy the text and commit the message + memcpy(message->payload, text, size); + rmtMessageQueue_CommitMessage(message, MsgID_LogText); + + return RMT_TRUE; +} + +RMT_API void _rmt_LogText(rmtPStr text) +{ + int start_offset, offset, i; + unsigned char line_buffer[1024] = {0}; + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) != RMT_ERROR_NONE) + { + return; + } + + // Start with empty line size + // Fill with spaces to enable viewing line_buffer without offset in a debugger + // (will be overwritten later by QueueLine/rmtMessageQueue_AllocMessage) + line_buffer[0] = ' '; + line_buffer[1] = ' '; + line_buffer[2] = ' '; + line_buffer[3] = ' '; + start_offset = 4; + + // There might be newlines in the buffer, so split them into multiple network calls + offset = start_offset; + for (i = 0; text[i] != 0; i++) + { + char c = text[i]; + + // Line wrap when too long or newline encountered + if (offset == sizeof(line_buffer) - 1 || c == '\n') + { + // Send the line up to now + if (QueueLine(g_Remotery->mq_to_rmt_thread, line_buffer, offset, thread_profiler) == RMT_FALSE) + return; + + // Restart line + offset = start_offset; + + // Don't add the newline character (if this was the reason for the flush) + // to the restarted line_buffer, let's skip it + if (c == '\n') + continue; + } + + line_buffer[offset++] = c; + } + + // Send the last line + if (offset > start_offset) + { + assert(offset < (int)sizeof(line_buffer)); + QueueLine(g_Remotery->mq_to_rmt_thread, line_buffer, offset, thread_profiler); + } +} + +RMT_API void _rmt_BeginCPUSample(rmtPStr name, rmtU32 flags, rmtU32* hash_cache) +{ + // 'hash_cache' stores a pointer to a sample name's hash value. Internally this is used to identify unique + // callstacks and it would be ideal that it's not recalculated each time the sample is used. This can be statically + // cached at the point of call or stored elsewhere when dynamic names are required. + // + // If 'hash_cache' is NULL then this call becomes more expensive, as it has to recalculate the hash of the name. + + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + // TODO: Time how long the bits outside here cost and subtract them from the parent + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + Sample* sample; + rmtU32 name_hash = ThreadProfiler_GetNameHash(thread_profiler, g_Remotery->mq_to_rmt_thread, name, hash_cache); + if (ThreadProfiler_Push(thread_profiler->sampleTrees[RMT_SampleType_CPU], name_hash, flags, &sample) == RMT_ERROR_NONE) + { + // If this is an aggregate sample, store the time in 'end' as we want to preserve 'start' + if (sample->call_count > 1) + sample->us_end = usTimer_Get(&g_Remotery->timer); + else + sample->us_start = usTimer_Get(&g_Remotery->timer); + } + } +} + +RMT_API void _rmt_EndCPUSample(void) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + Sample* sample = thread_profiler->sampleTrees[RMT_SampleType_CPU]->currentParent; + + if (sample->recurse_depth > 0) + { + sample->recurse_depth--; + } + else + { + rmtU64 us_end = usTimer_Get(&g_Remotery->timer); + Sample_Close(sample, us_end); + ThreadProfiler_Pop(thread_profiler, g_Remotery->mq_to_rmt_thread, sample, 0); + } + } +} + +#if RMT_USE_D3D12 +static rmtError D3D12MarkFrame(struct D3D12BindImpl* bind); +#endif + +#if RMT_USE_VULKAN +static rmtError VulkanMarkFrame(struct VulkanBindImpl* bind, rmtBool recurse); +#endif + +RMT_API rmtError _rmt_MarkFrame(void) +{ + if (g_Remotery == NULL) + { + return RMT_ERROR_REMOTERY_NOT_CREATED; + } + + #if RMT_USE_D3D12 + // This will kick off mark frames on the complete chain of binds + rmtTry(D3D12MarkFrame(g_Remotery->d3d12Binds)); + #endif + + #if RMT_USE_VULKAN + // This will kick off mark frames on the complete chain of binds + rmtTry(VulkanMarkFrame(g_Remotery->vulkanBinds, RMT_TRUE)); + #endif + + return RMT_ERROR_NONE; +} + +#if RMT_USE_OPENGL || RMT_USE_D3D11 || RMT_USE_D3D12 || RMT_USE_VULKAN +static void Remotery_DeleteSampleTree(Remotery* rmt, enum rmtSampleType sample_type) +{ + ThreadProfiler* thread_profiler; + + // Get the attached thread sampler and delete the sample tree + assert(rmt != NULL); + if (ThreadProfilers_GetCurrentThreadProfiler(rmt->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + SampleTree* sample_tree = thread_profiler->sampleTrees[sample_type]; + if (sample_tree != NULL) + { + rmtDelete(SampleTree, sample_tree); + thread_profiler->sampleTrees[sample_type] = NULL; + } + } +} + +static rmtBool rmtMessageQueue_IsEmpty(rmtMessageQueue* queue) +{ + assert(queue != NULL); + return queue->write_pos - queue->read_pos == 0; +} + +typedef struct +{ + rmtSampleType sample_type; + Buffer* flush_samples; +} GatherQueuedSampleData; + +static void MapMessageQueueAndWait(Remotery* rmt, void (*map_message_queue_fn)(Remotery* rmt, Message*), void* data) +{ + // Basic spin lock on the map function itself + while (AtomicCompareAndSwapPointer((rmtAtomicVoidPtr*)&rmt->map_message_queue_fn, NULL, + (long*)map_message_queue_fn) == RMT_FALSE) + msSleep(1); + + StoreReleasePointer((long* volatile*)&rmt->map_message_queue_data, (long*)data); + + // Wait until map completes + while (LoadAcquirePointer((long* volatile*)&rmt->map_message_queue_fn) != NULL) + msSleep(1); +} + +static void GatherQueuedSamples(Remotery* rmt, Message* message) +{ + GatherQueuedSampleData* gather_data = (GatherQueuedSampleData*)rmt->map_message_queue_data; + + // Filter sample trees + if (message->id == MsgID_SampleTree) + { + Msg_SampleTree* sample_tree = (Msg_SampleTree*)message->payload; + Sample* sample = sample_tree->rootSample; + if (sample->type == gather_data->sample_type) + { + // Make a copy of the entire sample tree as the remotery thread may overwrite it while + // the calling thread tries to delete + rmtU32 message_size = rmtMessageQueue_SizeForPayload(message->payload_size); + Buffer_Write(gather_data->flush_samples, message, message_size); + + // Mark the message empty + message->id = MsgID_None; + } + } +} + +static void FreePendingSampleTrees(Remotery* rmt, rmtSampleType sample_type, Buffer* flush_samples) +{ + rmtU8* data; + rmtU8* data_end; + + // Gather all sample trees currently queued for the Remotery thread + GatherQueuedSampleData gather_data; + gather_data.sample_type = sample_type; + gather_data.flush_samples = flush_samples; + MapMessageQueueAndWait(rmt, GatherQueuedSamples, &gather_data); + + // Release all sample trees to their allocators + data = flush_samples->data; + data_end = data + flush_samples->bytes_used; + while (data < data_end) + { + Message* message = (Message*)data; + rmtU32 message_size = rmtMessageQueue_SizeForPayload(message->payload_size); + Msg_SampleTree* sample_tree = (Msg_SampleTree*)message->payload; + FreeSamples(sample_tree->rootSample, sample_tree->allocator); + data += message_size; + } +} + +#endif + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @CUDA: CUDA event sampling +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#if RMT_USE_CUDA + +typedef struct CUDASample +{ + // IS-A inheritance relationship + Sample base; + + // Pair of events that wrap the sample + CUevent event_start; + CUevent event_end; + +} CUDASample; + +static rmtError MapCUDAResult(CUresult result) +{ + switch (result) + { + case CUDA_SUCCESS: + return RMT_ERROR_NONE; + case CUDA_ERROR_DEINITIALIZED: + return RMT_ERROR_CUDA_DEINITIALIZED; + case CUDA_ERROR_NOT_INITIALIZED: + return RMT_ERROR_CUDA_NOT_INITIALIZED; + case CUDA_ERROR_INVALID_CONTEXT: + return RMT_ERROR_CUDA_INVALID_CONTEXT; + case CUDA_ERROR_INVALID_VALUE: + return RMT_ERROR_CUDA_INVALID_VALUE; + case CUDA_ERROR_INVALID_HANDLE: + return RMT_ERROR_CUDA_INVALID_HANDLE; + case CUDA_ERROR_OUT_OF_MEMORY: + return RMT_ERROR_CUDA_OUT_OF_MEMORY; + case CUDA_ERROR_NOT_READY: + return RMT_ERROR_ERROR_NOT_READY; + default: + return RMT_ERROR_CUDA_UNKNOWN; + } +} + +#define CUDA_MAKE_FUNCTION(name, params) \ + typedef CUresult(CUDAAPI* name##Ptr) params; \ + name##Ptr name = (name##Ptr)g_Remotery->cuda.name; + +#define CUDA_GUARD(call) \ + { \ + rmtError error = call; \ + if (error != RMT_ERROR_NONE) \ + return error; \ + } + +// Wrappers around CUDA driver functions that manage the active context. +static rmtError CUDASetContext(void* context) +{ + CUDA_MAKE_FUNCTION(CtxSetCurrent, (CUcontext ctx)); + assert(CtxSetCurrent != NULL); + return MapCUDAResult(CtxSetCurrent((CUcontext)context)); +} +static rmtError CUDAGetContext(void** context) +{ + CUDA_MAKE_FUNCTION(CtxGetCurrent, (CUcontext * ctx)); + assert(CtxGetCurrent != NULL); + return MapCUDAResult(CtxGetCurrent((CUcontext*)context)); +} +static rmtError CUDAEnsureContext() +{ + void* current_context; + CUDA_GUARD(CUDAGetContext(¤t_context)); + + assert(g_Remotery != NULL); + if (current_context != g_Remotery->cuda.context) + CUDA_GUARD(CUDASetContext(g_Remotery->cuda.context)); + + return RMT_ERROR_NONE; +} + +// Wrappers around CUDA driver functions that manage events +static rmtError CUDAEventCreate(CUevent* phEvent, unsigned int Flags) +{ + CUDA_MAKE_FUNCTION(EventCreate, (CUevent * phEvent, unsigned int Flags)); + CUDA_GUARD(CUDAEnsureContext()); + return MapCUDAResult(EventCreate(phEvent, Flags)); +} +static rmtError CUDAEventDestroy(CUevent hEvent) +{ + CUDA_MAKE_FUNCTION(EventDestroy, (CUevent hEvent)); + CUDA_GUARD(CUDAEnsureContext()); + return MapCUDAResult(EventDestroy(hEvent)); +} +static rmtError CUDAEventRecord(CUevent hEvent, void* hStream) +{ + CUDA_MAKE_FUNCTION(EventRecord, (CUevent hEvent, CUstream hStream)); + CUDA_GUARD(CUDAEnsureContext()); + return MapCUDAResult(EventRecord(hEvent, (CUstream)hStream)); +} +static rmtError CUDAEventQuery(CUevent hEvent) +{ + CUDA_MAKE_FUNCTION(EventQuery, (CUevent hEvent)); + CUDA_GUARD(CUDAEnsureContext()); + return MapCUDAResult(EventQuery(hEvent)); +} +static rmtError CUDAEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd) +{ + CUDA_MAKE_FUNCTION(EventElapsedTime, (float* pMilliseconds, CUevent hStart, CUevent hEnd)); + CUDA_GUARD(CUDAEnsureContext()); + return MapCUDAResult(EventElapsedTime(pMilliseconds, hStart, hEnd)); +} + +static rmtError CUDASample_Constructor(CUDASample* sample) +{ + rmtError error; + + assert(sample != NULL); + + // Chain to sample constructor + Sample_Constructor((Sample*)sample); + sample->base.type = RMT_SampleType_CUDA; + sample->event_start = NULL; + sample->event_end = NULL; + + // Create non-blocking events with timing + assert(g_Remotery != NULL); + error = CUDAEventCreate(&sample->event_start, CU_EVENT_DEFAULT); + if (error == RMT_ERROR_NONE) + error = CUDAEventCreate(&sample->event_end, CU_EVENT_DEFAULT); + return error; +} + +static void CUDASample_Destructor(CUDASample* sample) +{ + assert(sample != NULL); + + // Destroy events + if (sample->event_start != NULL) + CUDAEventDestroy(sample->event_start); + if (sample->event_end != NULL) + CUDAEventDestroy(sample->event_end); + + Sample_Destructor((Sample*)sample); +} + +static rmtBool AreCUDASamplesReady(Sample* sample) +{ + rmtError error; + Sample* child; + + CUDASample* cuda_sample = (CUDASample*)sample; + assert(sample->type == RMT_SampleType_CUDA); + + // Check to see if both of the CUDA events have been processed + error = CUDAEventQuery(cuda_sample->event_start); + if (error != RMT_ERROR_NONE) + return RMT_FALSE; + error = CUDAEventQuery(cuda_sample->event_end); + if (error != RMT_ERROR_NONE) + return RMT_FALSE; + + // Check child sample events + for (child = sample->first_child; child != NULL; child = child->next_sibling) + { + if (!AreCUDASamplesReady(child)) + return RMT_FALSE; + } + + return RMT_TRUE; +} + +static rmtBool GetCUDASampleTimes(Sample* root_sample, Sample* sample) +{ + Sample* child; + + CUDASample* cuda_root_sample = (CUDASample*)root_sample; + CUDASample* cuda_sample = (CUDASample*)sample; + + float ms_start, ms_end; + + assert(root_sample != NULL); + assert(sample != NULL); + + // Get millisecond timing of each sample event, relative to initial root sample + if (CUDAEventElapsedTime(&ms_start, cuda_root_sample->event_start, cuda_sample->event_start) != RMT_ERROR_NONE) + return RMT_FALSE; + if (CUDAEventElapsedTime(&ms_end, cuda_root_sample->event_start, cuda_sample->event_end) != RMT_ERROR_NONE) + return RMT_FALSE; + + // Convert to microseconds and add to the sample + sample->us_start = (rmtU64)(ms_start * 1000); + sample->us_end = (rmtU64)(ms_end * 1000); + sample->us_length = sample->us_end - sample->us_start; + + // Get child sample times + for (child = sample->first_child; child != NULL; child = child->next_sibling) + { + if (!GetCUDASampleTimes(root_sample, child)) + return RMT_FALSE; + } + + return RMT_TRUE; +} + +RMT_API void _rmt_BindCUDA(const rmtCUDABind* bind) +{ + assert(bind != NULL); + if (g_Remotery != NULL) + g_Remotery->cuda = *bind; +} + +RMT_API void _rmt_BeginCUDASample(rmtPStr name, rmtU32* hash_cache, void* stream) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + rmtError error; + Sample* sample; + rmtU32 name_hash = ThreadProfiler_GetNameHash(thread_profiler, g_Remotery->mq_to_rmt_thread, name, hash_cache); + + // Create the CUDA tree on-demand as the tree needs an up-front-created root. + // This is not possible to create on initialisation as a CUDA binding is not yet available. + SampleTree** cuda_tree = &thread_profiler->sampleTrees[RMT_SampleType_CUDA]; + if (*cuda_tree == NULL) + { + CUDASample* root_sample; + + rmtTryNew(SampleTree, *cuda_tree, sizeof(CUDASample), (ObjConstructor)CUDASample_Constructor, + (ObjDestructor)CUDASample_Destructor); + if (error != RMT_ERROR_NONE) + return; + + // Record an event once on the root sample, used to measure absolute sample + // times since this point + root_sample = (CUDASample*)(*cuda_tree)->root; + error = CUDAEventRecord(root_sample->event_start, stream); + if (error != RMT_ERROR_NONE) + return; + } + + // Push the same and record its event + if (ThreadProfiler_Push(*cuda_tree, name_hash, 0, &sample) == RMT_ERROR_NONE) + { + CUDASample* cuda_sample = (CUDASample*)sample; + cuda_sample->base.usGpuIssueOnCpu = usTimer_Get(&g_Remotery->timer); + CUDAEventRecord(cuda_sample->event_start, stream); + } + } +} + +RMT_API void _rmt_EndCUDASample(void* stream) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + CUDASample* sample = (CUDASample*)thread_profiler->sampleTrees[RMT_SampleType_CUDA]->currentParent; + if (sample->base.recurse_depth > 0) + { + sample->base.recurse_depth--; + } + else + { + CUDAEventRecord(sample->event_end, stream); + ThreadProfiler_Pop(thread_profiler, g_Remotery->mq_to_rmt_thread, (Sample*)sample, 0); + } + } +} + +#endif // RMT_USE_CUDA + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @D3D11: Direct3D 11 event sampling +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#if RMT_USE_D3D11 + +// As clReflect has no way of disabling C++ compile mode, this forces C interfaces everywhere... +#define CINTERFACE + +// ...unfortunately these C++ helpers aren't wrapped by the same macro but they can be disabled individually +#define D3D11_NO_HELPERS + +// Allow use of the D3D11 helper macros for accessing the C-style vtable +#define COBJMACROS + +#ifdef _MSC_VER +// Disable for d3d11.h +// warning C4201: nonstandard extension used : nameless struct/union +#pragma warning(push) +#pragma warning(disable : 4201) +#endif + +#include + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +typedef struct D3D11 +{ + // Context set by user + ID3D11Device* device; + ID3D11DeviceContext* context; + + HRESULT last_error; + + // Queue to the D3D 11 main update thread + // Given that BeginSample/EndSample need to be called from the same thread that does the update, there + // is really no need for this to be a thread-safe queue. I'm using it for its convenience. + rmtMessageQueue* mq_to_d3d11_main; + + // Mark the first time so that remaining timestamps are offset from this + rmtU64 first_timestamp; + // Last time in us (CPU time, via usTimer_Get) since we last resync'ed CPU & GPU + rmtU64 last_resync; + + // Sample trees in transit in the message queue for release on shutdown + Buffer* flush_samples; +} D3D11; + +static rmtError D3D11_Create(D3D11** d3d11) +{ + assert(d3d11 != NULL); + + // Allocate space for the D3D11 data + rmtTryMalloc(D3D11, *d3d11); + + // Set defaults + (*d3d11)->device = NULL; + (*d3d11)->context = NULL; + (*d3d11)->last_error = S_OK; + (*d3d11)->mq_to_d3d11_main = NULL; + (*d3d11)->first_timestamp = 0; + (*d3d11)->last_resync = 0; + (*d3d11)->flush_samples = NULL; + + rmtTryNew(rmtMessageQueue, (*d3d11)->mq_to_d3d11_main, g_Settings.messageQueueSizeInBytes); + rmtTryNew(Buffer, (*d3d11)->flush_samples, 8 * 1024); + + return RMT_ERROR_NONE; +} + +static void D3D11_Destructor(D3D11* d3d11) +{ + assert(d3d11 != NULL); + rmtDelete(Buffer, d3d11->flush_samples); + rmtDelete(rmtMessageQueue, d3d11->mq_to_d3d11_main); +} + +static HRESULT rmtD3D11Finish(ID3D11Device* device, ID3D11DeviceContext* context, rmtU64* out_timestamp, + double* out_frequency) +{ + HRESULT result; + ID3D11Query* full_stall_fence; + ID3D11Query* query_disjoint; + D3D11_QUERY_DESC query_desc; + D3D11_QUERY_DESC disjoint_desc; + UINT64 timestamp; + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; + + query_desc.Query = D3D11_QUERY_TIMESTAMP; + query_desc.MiscFlags = 0; + result = ID3D11Device_CreateQuery(device, &query_desc, &full_stall_fence); + if (result != S_OK) + return result; + + disjoint_desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + disjoint_desc.MiscFlags = 0; + result = ID3D11Device_CreateQuery(device, &disjoint_desc, &query_disjoint); + if (result != S_OK) + { + ID3D11Query_Release(full_stall_fence); + return result; + } + + ID3D11DeviceContext_Begin(context, (ID3D11Asynchronous*)query_disjoint); + ID3D11DeviceContext_End(context, (ID3D11Asynchronous*)full_stall_fence); + ID3D11DeviceContext_End(context, (ID3D11Asynchronous*)query_disjoint); + + result = S_FALSE; + + while (result == S_FALSE) + { + result = + ID3D11DeviceContext_GetData(context, (ID3D11Asynchronous*)query_disjoint, &disjoint, sizeof(disjoint), 0); + if (result != S_OK && result != S_FALSE) + { + ID3D11Query_Release(full_stall_fence); + ID3D11Query_Release(query_disjoint); + return result; + } + if (result == S_OK) + { + result = ID3D11DeviceContext_GetData(context, (ID3D11Asynchronous*)full_stall_fence, ×tamp, + sizeof(timestamp), 0); + if (result != S_OK && result != S_FALSE) + { + ID3D11Query_Release(full_stall_fence); + ID3D11Query_Release(query_disjoint); + return result; + } + } + // Give HyperThreading threads a breath on this spinlock. + YieldProcessor(); + } + + if (disjoint.Disjoint == FALSE) + { + double frequency = disjoint.Frequency / 1000000.0; + *out_timestamp = timestamp; + *out_frequency = frequency; + } + else + { + result = S_FALSE; + } + + ID3D11Query_Release(full_stall_fence); + ID3D11Query_Release(query_disjoint); + return result; +} + +static HRESULT SyncD3D11CpuGpuTimes(ID3D11Device* device, ID3D11DeviceContext* context, rmtU64* out_first_timestamp, + rmtU64* out_last_resync) +{ + rmtU64 cpu_time_start = 0; + rmtU64 cpu_time_stop = 0; + rmtU64 average_half_RTT = 0; // RTT = Rountrip Time. + UINT64 gpu_base = 0; + double frequency = 1; + int i; + + HRESULT result; + result = rmtD3D11Finish(device, context, &gpu_base, &frequency); + if (result != S_OK && result != S_FALSE) + return result; + + for (i = 0; i < RMT_GPU_CPU_SYNC_NUM_ITERATIONS; ++i) + { + rmtU64 half_RTT; + cpu_time_start = usTimer_Get(&g_Remotery->timer); + result = rmtD3D11Finish(device, context, &gpu_base, &frequency); + cpu_time_stop = usTimer_Get(&g_Remotery->timer); + + if (result != S_OK && result != S_FALSE) + return result; + + // Ignore attempts where there was a disjoint, since there would + // be a lot of noise in those readings for measuring the RTT + if (result == S_OK) + { + // Average the time it takes a roundtrip from CPU to GPU + // while doing nothing other than getting timestamps + half_RTT = (cpu_time_stop - cpu_time_start) >> 1ULL; + if (i == 0) + average_half_RTT = half_RTT; + else + average_half_RTT = (average_half_RTT + half_RTT) >> 1ULL; + } + } + + // All GPU times are offset from gpu_base, and then taken to + // the same relative origin CPU timestamps are based on. + // CPU is in us, we must translate it to ns. + *out_first_timestamp = gpu_base - (rmtU64)((cpu_time_start + average_half_RTT) * frequency); + *out_last_resync = cpu_time_stop; + + return result; +} + +typedef struct D3D11Timestamp +{ + // Inherit so that timestamps can be quickly allocated + ObjectLink Link; + + // Pair of timestamp queries that wrap the sample + ID3D11Query* query_start; + ID3D11Query* query_end; + + // A disjoint to measure frequency/stability + // TODO: Does *each* sample need one of these? + ID3D11Query* query_disjoint; + + rmtU64 cpu_timestamp; +} D3D11Timestamp; + +static rmtError D3D11Timestamp_Constructor(D3D11Timestamp* stamp) +{ + ThreadProfiler* thread_profiler; + D3D11_QUERY_DESC timestamp_desc; + D3D11_QUERY_DESC disjoint_desc; + ID3D11Device* device; + HRESULT* last_error; + rmtError rmt_error; + + assert(stamp != NULL); + + ObjectLink_Constructor((ObjectLink*)stamp); + + // Set defaults + stamp->query_start = NULL; + stamp->query_end = NULL; + stamp->query_disjoint = NULL; + stamp->cpu_timestamp = 0; + + assert(g_Remotery != NULL); + rmt_error = ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler); + if (rmt_error != RMT_ERROR_NONE) + { + return rmt_error; + } + assert(thread_profiler->d3d11 != NULL); + device = thread_profiler->d3d11->device; + last_error = &thread_profiler->d3d11->last_error; + + // Create start/end timestamp queries + timestamp_desc.Query = D3D11_QUERY_TIMESTAMP; + timestamp_desc.MiscFlags = 0; + *last_error = ID3D11Device_CreateQuery(device, ×tamp_desc, &stamp->query_start); + if (*last_error != S_OK) + return RMT_ERROR_D3D11_FAILED_TO_CREATE_QUERY; + *last_error = ID3D11Device_CreateQuery(device, ×tamp_desc, &stamp->query_end); + if (*last_error != S_OK) + return RMT_ERROR_D3D11_FAILED_TO_CREATE_QUERY; + + // Create disjoint query + disjoint_desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + disjoint_desc.MiscFlags = 0; + *last_error = ID3D11Device_CreateQuery(device, &disjoint_desc, &stamp->query_disjoint); + if (*last_error != S_OK) + return RMT_ERROR_D3D11_FAILED_TO_CREATE_QUERY; + + return RMT_ERROR_NONE; +} + +static void D3D11Timestamp_Destructor(D3D11Timestamp* stamp) +{ + assert(stamp != NULL); + + // Destroy queries + if (stamp->query_disjoint != NULL) + ID3D11Query_Release(stamp->query_disjoint); + if (stamp->query_end != NULL) + ID3D11Query_Release(stamp->query_end); + if (stamp->query_start != NULL) + ID3D11Query_Release(stamp->query_start); +} + +static void D3D11Timestamp_Begin(D3D11Timestamp* stamp, ID3D11DeviceContext* context) +{ + assert(stamp != NULL); + + // Start of disjoint and first query + stamp->cpu_timestamp = usTimer_Get(&g_Remotery->timer); + ID3D11DeviceContext_Begin(context, (ID3D11Asynchronous*)stamp->query_disjoint); + ID3D11DeviceContext_End(context, (ID3D11Asynchronous*)stamp->query_start); +} + +static void D3D11Timestamp_End(D3D11Timestamp* stamp, ID3D11DeviceContext* context) +{ + assert(stamp != NULL); + + // End of disjoint and second query + ID3D11DeviceContext_End(context, (ID3D11Asynchronous*)stamp->query_end); + ID3D11DeviceContext_End(context, (ID3D11Asynchronous*)stamp->query_disjoint); +} + +static HRESULT D3D11Timestamp_GetData(D3D11Timestamp* stamp, ID3D11Device* device, ID3D11DeviceContext* context, + rmtU64* out_start, rmtU64* out_end, rmtU64* out_first_timestamp, + rmtU64* out_last_resync) +{ + ID3D11Asynchronous* query_start; + ID3D11Asynchronous* query_end; + ID3D11Asynchronous* query_disjoint; + HRESULT result; + + UINT64 start; + UINT64 end; + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; + + assert(stamp != NULL); + query_start = (ID3D11Asynchronous*)stamp->query_start; + query_end = (ID3D11Asynchronous*)stamp->query_end; + query_disjoint = (ID3D11Asynchronous*)stamp->query_disjoint; + + // Check to see if all queries are ready + // If any fail to arrive, wait until later + result = ID3D11DeviceContext_GetData(context, query_start, &start, sizeof(start), D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (result != S_OK) + return result; + result = ID3D11DeviceContext_GetData(context, query_end, &end, sizeof(end), D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (result != S_OK) + return result; + result = ID3D11DeviceContext_GetData(context, query_disjoint, &disjoint, sizeof(disjoint), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (result != S_OK) + return result; + + if (disjoint.Disjoint == FALSE) + { + double frequency = disjoint.Frequency / 1000000.0; + + // Mark the first timestamp. We may resync if we detect the GPU timestamp is in the + // past (i.e. happened before the CPU command) since it should be impossible. + assert(out_first_timestamp != NULL); + if (*out_first_timestamp == 0 || ((start - *out_first_timestamp) / frequency) < stamp->cpu_timestamp) + { + result = SyncD3D11CpuGpuTimes(device, context, out_first_timestamp, out_last_resync); + if (result != S_OK) + return result; + } + + // Calculate start and end timestamps from the disjoint info + *out_start = (rmtU64)((start - *out_first_timestamp) / frequency); + *out_end = (rmtU64)((end - *out_first_timestamp) / frequency); + } + else + { +#if RMT_D3D11_RESYNC_ON_DISJOINT + result = SyncD3D11CpuGpuTimes(device, context, out_first_timestamp, out_last_resync); + if (result != S_OK) + return result; +#endif + } + + return S_OK; +} + +typedef struct D3D11Sample +{ + // IS-A inheritance relationship + Sample base; + + D3D11Timestamp* timestamp; + +} D3D11Sample; + +static rmtError D3D11Sample_Constructor(D3D11Sample* sample) +{ + assert(sample != NULL); + + // Chain to sample constructor + Sample_Constructor((Sample*)sample); + sample->base.type = RMT_SampleType_D3D11; + rmtTryNew(D3D11Timestamp, sample->timestamp); + + return RMT_ERROR_NONE; +} + +static void D3D11Sample_Destructor(D3D11Sample* sample) +{ + rmtDelete(D3D11Timestamp, sample->timestamp); + Sample_Destructor((Sample*)sample); +} + +RMT_API void _rmt_BindD3D11(void* device, void* context) +{ + if (g_Remotery != NULL) + { + ThreadProfiler* thread_profiler; + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + assert(thread_profiler->d3d11 != NULL); + + assert(device != NULL); + thread_profiler->d3d11->device = (ID3D11Device*)device; + assert(context != NULL); + thread_profiler->d3d11->context = (ID3D11DeviceContext*)context; + } + } +} + +static void UpdateD3D11Frame(ThreadProfiler* thread_profiler); + +RMT_API void _rmt_UnbindD3D11(void) +{ + if (g_Remotery != NULL) + { + ThreadProfiler* thread_profiler; + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + D3D11* d3d11 = thread_profiler->d3d11; + assert(d3d11 != NULL); + + // Stall waiting for the D3D queue to empty into the Remotery queue + while (!rmtMessageQueue_IsEmpty(d3d11->mq_to_d3d11_main)) + UpdateD3D11Frame(thread_profiler); + + // There will be a whole bunch of D3D11 sample trees queued up the remotery queue that need releasing + FreePendingSampleTrees(g_Remotery, RMT_SampleType_D3D11, d3d11->flush_samples); + + // Inform sampler to not add any more samples + d3d11->device = NULL; + d3d11->context = NULL; + + // Forcefully delete sample tree on this thread to release time stamps from + // the same thread that created them + Remotery_DeleteSampleTree(g_Remotery, RMT_SampleType_D3D11); + } + } +} + +static rmtError AllocateD3D11SampleTree(SampleTree** d3d_tree) +{ + rmtTryNew(SampleTree, *d3d_tree, sizeof(D3D11Sample), (ObjConstructor)D3D11Sample_Constructor, + (ObjDestructor)D3D11Sample_Destructor); + return RMT_ERROR_NONE; +} + +RMT_API void _rmt_BeginD3D11Sample(rmtPStr name, rmtU32* hash_cache) +{ + ThreadProfiler* thread_profiler; + D3D11* d3d11; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + Sample* sample; + rmtU32 name_hash; + SampleTree** d3d_tree; + + // Has D3D11 been unbound? + d3d11 = thread_profiler->d3d11; + assert(d3d11 != NULL); + if (d3d11->device == NULL || d3d11->context == NULL) + return; + + name_hash = ThreadProfiler_GetNameHash(thread_profiler, g_Remotery->mq_to_rmt_thread, name, hash_cache); + + // Create the D3D11 tree on-demand as the tree needs an up-front-created root. + // This is not possible to create on initialisation as a D3D11 binding is not yet available. + d3d_tree = &thread_profiler->sampleTrees[RMT_SampleType_D3D11]; + if (*d3d_tree == NULL) + { + AllocateD3D11SampleTree(d3d_tree); + } + + // Push the sample and activate the timestamp + if (ThreadProfiler_Push(*d3d_tree, name_hash, 0, &sample) == RMT_ERROR_NONE) + { + D3D11Sample* d3d_sample = (D3D11Sample*)sample; + d3d_sample->base.usGpuIssueOnCpu = usTimer_Get(&g_Remotery->timer); + D3D11Timestamp_Begin(d3d_sample->timestamp, d3d11->context); + } + } +} + +static rmtBool GetD3D11SampleTimes(Sample* sample, ThreadProfiler* thread_profiler, rmtU64* out_first_timestamp, + rmtU64* out_last_resync) +{ + Sample* child; + + D3D11Sample* d3d_sample = (D3D11Sample*)sample; + + assert(sample != NULL); + if (d3d_sample->timestamp != NULL) + { + HRESULT result; + + D3D11* d3d11 = thread_profiler->d3d11; + assert(d3d11 != NULL); + + assert(out_last_resync != NULL); + +#if (RMT_GPU_CPU_SYNC_SECONDS > 0) + if (*out_last_resync < d3d_sample->timestamp->cpu_timestamp) + { + // Convert from us to seconds. + rmtU64 time_diff = (d3d_sample->timestamp->cpu_timestamp - *out_last_resync) / 1000000ULL; + if (time_diff > RMT_GPU_CPU_SYNC_SECONDS) + { + result = SyncD3D11CpuGpuTimes(d3d11->device, d3d11->context, out_first_timestamp, out_last_resync); + if (result != S_OK) + { + d3d11->last_error = result; + return RMT_FALSE; + } + } + } +#endif + + result = D3D11Timestamp_GetData(d3d_sample->timestamp, d3d11->device, d3d11->context, &sample->us_start, + &sample->us_end, out_first_timestamp, out_last_resync); + + if (result != S_OK) + { + d3d11->last_error = result; + return RMT_FALSE; + } + + sample->us_length = sample->us_end - sample->us_start; + } + + // Sum length on the parent to track un-sampled time in the parent + if (sample->parent != NULL) + { + sample->parent->us_sampled_length += sample->us_length; + } + + // Get child sample times + for (child = sample->first_child; child != NULL; child = child->next_sibling) + { + if (!GetD3D11SampleTimes(child, thread_profiler, out_first_timestamp, out_last_resync)) + return RMT_FALSE; + } + + return RMT_TRUE; +} + +static void UpdateD3D11Frame(ThreadProfiler* thread_profiler) +{ + D3D11* d3d11; + + if (g_Remotery == NULL) + return; + + d3d11 = thread_profiler->d3d11; + assert(d3d11 != NULL); + + rmt_BeginCPUSample(rmt_UpdateD3D11Frame, 0); + + // Process all messages in the D3D queue + for (;;) + { + Msg_SampleTree* sample_tree; + Sample* sample; + + Message* message = rmtMessageQueue_PeekNextMessage(d3d11->mq_to_d3d11_main); + if (message == NULL) + break; + + // There's only one valid message type in this queue + assert(message->id == MsgID_SampleTree); + sample_tree = (Msg_SampleTree*)message->payload; + sample = sample_tree->rootSample; + assert(sample->type == RMT_SampleType_D3D11); + + // Retrieve timing of all D3D11 samples + // If they aren't ready leave the message unconsumed, holding up later frames and maintaining order + if (!GetD3D11SampleTimes(sample, thread_profiler, &d3d11->first_timestamp, &d3d11->last_resync)) + break; + + // Pass samples onto the remotery thread for sending to the viewer + QueueSampleTree(g_Remotery->mq_to_rmt_thread, sample, sample_tree->allocator, sample_tree->threadName, 0, + message->threadProfiler, RMT_FALSE); + rmtMessageQueue_ConsumeNextMessage(d3d11->mq_to_d3d11_main, message); + } + + rmt_EndCPUSample(); +} + +RMT_API void _rmt_EndD3D11Sample(void) +{ + ThreadProfiler* thread_profiler; + D3D11* d3d11; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + D3D11Sample* d3d_sample; + + // Has D3D11 been unbound? + d3d11 = thread_profiler->d3d11; + assert(d3d11 != NULL); + if (d3d11->device == NULL || d3d11->context == NULL) + return; + + // Close the timestamp + d3d_sample = (D3D11Sample*)thread_profiler->sampleTrees[RMT_SampleType_D3D11]->currentParent; + if (d3d_sample->base.recurse_depth > 0) + { + d3d_sample->base.recurse_depth--; + } + else + { + if (d3d_sample->timestamp != NULL) + D3D11Timestamp_End(d3d_sample->timestamp, d3d11->context); + + // Send to the update loop for ready-polling + if (ThreadProfiler_Pop(thread_profiler, d3d11->mq_to_d3d11_main, (Sample*)d3d_sample, 0)) + // Perform ready-polling on popping of the root sample + UpdateD3D11Frame(thread_profiler); + } + } +} + +#endif // RMT_USE_D3D11 + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @D3D12: Direct3D 12 event sampling +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#if RMT_USE_D3D12 + +// As clReflect has no way of disabling C++ compile mode, this forces C interfaces everywhere... +#define CINTERFACE + +#include + +typedef struct D3D12ThreadData +{ + rmtU32 lastAllocatedQueryIndex; + + // Sample trees in transit in the message queue for release on shutdown + Buffer* flushSamples; +} D3D12ThreadData; + +static rmtError D3D12ThreadData_Create(D3D12ThreadData** d3d12_thread_data) +{ + assert(d3d12_thread_data != NULL); + + // Allocate space for the D3D12 data + rmtTryMalloc(D3D12ThreadData, *d3d12_thread_data); + + // Set defaults + (*d3d12_thread_data)->lastAllocatedQueryIndex = 0; + (*d3d12_thread_data)->flushSamples = NULL; + + rmtTryNew(Buffer, (*d3d12_thread_data)->flushSamples, 8 * 1024); + + return RMT_ERROR_NONE; +} + +static void D3D12ThreadData_Destructor(D3D12ThreadData* d3d12_thread_data) +{ + assert(d3d12_thread_data != NULL); + rmtDelete(Buffer, d3d12_thread_data->flushSamples); +} + +typedef struct D3D12Sample +{ + // IS-A inheritance relationship + Sample base; + + // Cached bind and command list used to create the sample so that the user doesn't have to pass it + struct D3D12BindImpl* bind; + ID3D12GraphicsCommandList* commandList; + + // Begin/End timestamp indices in the query heap + rmtU32 queryIndex; + +} D3D12Sample; + +static rmtError D3D12Sample_Constructor(D3D12Sample* sample) +{ + assert(sample != NULL); + + // Chain to sample constructor + Sample_Constructor((Sample*)sample); + sample->base.type = RMT_SampleType_D3D12; + sample->bind = NULL; + sample->commandList = NULL; + sample->queryIndex = 0; + + return RMT_ERROR_NONE; +} + +static void D3D12Sample_Destructor(D3D12Sample* sample) +{ + Sample_Destructor((Sample*)sample); +} + +typedef struct D3D12BindImpl +{ + rmtD3D12Bind base; + + // Ring buffer of GPU timestamp destinations for all queries + rmtU32 maxNbQueries; + ID3D12QueryHeap* gpuTimestampRingBuffer; + + // CPU-accessible copy destination for all timestamps + ID3D12Resource* cpuTimestampRingBuffer; + + // Pointers to samples that expect the result of timestamps + D3D12Sample** sampleRingBuffer; + + // Read/write positions of the ring buffer allocator, synchronising access to all the ring buffers at once + // TODO(don): Separate by cache line? + rmtAtomicU32 ringBufferRead; + rmtAtomicU32 ringBufferWrite; + + ID3D12Fence* gpuQueryFence; + + + + // Queue to the D3D 12 main update thread + rmtMessageQueue* mqToD3D12Update; + + struct D3D12BindImpl* next; + +} D3D12BindImpl; + +#ifdef IID_PPV_ARGS +#define C_IID_PPV_ARGS(iid, addr) IID_PPV_ARGS(addr) +#else +#define C_IID_PPV_ARGS(iid, addr) &iid, (void**)addr +#endif + +#include + +static rmtError CreateQueryHeap(D3D12BindImpl* bind, ID3D12Device* d3d_device, ID3D12CommandQueue* d3d_queue, rmtU32 nb_queries) +{ + HRESULT hr; + D3D12_QUERY_HEAP_TYPE query_heap_type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + D3D12_COMMAND_QUEUE_DESC queue_desc; + D3D12_QUERY_HEAP_DESC query_heap_desc; + + // Select the correct query heap type for the copy queue + #if WDK_NTDDI_VERSION >= NTDDI_WIN10_CO + //d3d_queue->lpVtbl->GetDesc(d3d_queue, &queue_desc); + /*if (queue_desc.Type == D3D12_COMMAND_LIST_TYPE_COPY) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS3 feature_data; + hr = d3d_device->lpVtbl->CheckFeatureSupport(d3d_device, D3D12_FEATURE_D3D12_OPTIONS3, &feature_data, sizeof(feature_data)); + if (hr != S_OK || feature_data.CopyQueueTimestampQueriesSupported == FALSE) + { + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Copy queues on this device do not support timestamps"); + } + + query_heap_type = D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP; + }*/ + #else + if (queue_desc.Type == D3D12_COMMAND_LIST_TYPE_COPY) + { + // On old versions of Windows SDK the D3D C headers incorrectly returned structures + // The ABI is different and C++ expects return structures to be silently passed as parameters + // The newer headers add an extra out parameter to make this explicit + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Your Win10 SDK version is too old to determine if this device supports timestamps on copy queues"); + } + #endif + + // Create the heap for all the queries + ZeroMemory(&query_heap_desc, sizeof(query_heap_desc)); + query_heap_desc.Type = query_heap_type; + query_heap_desc.Count = nb_queries; + hr = d3d_device->lpVtbl->CreateQueryHeap(d3d_device, &query_heap_desc, C_IID_PPV_ARGS(IID_ID3D12QueryHeap, &bind->gpuTimestampRingBuffer)); + if (hr != S_OK) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "Failed to create D3D12 Query Heap"); + } + + return RMT_ERROR_NONE; +} + +static rmtError CreateCpuQueries(D3D12BindImpl* bind, ID3D12Device* d3d_device) +{ + D3D12_HEAP_PROPERTIES results_heap_props; + HRESULT hr; + + // We want a readback resource that the GPU can copy to and the CPU can read from + ZeroMemory(&results_heap_props, sizeof(results_heap_props)); + results_heap_props.Type = D3D12_HEAP_TYPE_READBACK; + + // Describe resource dimensions, enough to store a timestamp for each query + D3D12_RESOURCE_DESC results_desc; + ZeroMemory(&results_desc, sizeof(results_desc)); + results_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + results_desc.Width = bind->maxNbQueries * sizeof(rmtU64); + results_desc.Height = 1; + results_desc.DepthOrArraySize = 1; + results_desc.MipLevels = 1; + results_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + results_desc.SampleDesc.Count = 1; + + hr = d3d_device->lpVtbl->CreateCommittedResource(d3d_device, &results_heap_props, D3D12_HEAP_FLAG_NONE, + &results_desc, D3D12_RESOURCE_STATE_COPY_DEST, NULL, + C_IID_PPV_ARGS(IID_ID3D12Resource, &bind->cpuTimestampRingBuffer)); + if (hr != S_OK) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "Failed to create D3D12 Query Results Buffer"); + } + + return RMT_ERROR_NONE; +} + +static rmtError CreateQueryFence(D3D12BindImpl* bind, ID3D12Device* d3d_device) +{ + HRESULT hr = d3d_device->lpVtbl->CreateFence(d3d_device, 0, D3D12_FENCE_FLAG_NONE, C_IID_PPV_ARGS(IID_ID3D12Fence, &bind->gpuQueryFence)); + if (hr != S_OK) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "Failed to create D3D12 Query Fence"); + } + + return RMT_ERROR_NONE; +} + +static rmtError CopyD3D12Timestamps(D3D12BindImpl* bind, rmtU32 ring_pos_a, rmtU32 ring_pos_b, double gpu_ticks_to_us, rmtS64 gpu_to_cpu_timestamp_us) +{ + rmtU32 query_index; + D3D12_RANGE map; + rmtU64* cpu_timestamps; + + ID3D12Resource* cpu_timestamp_buffer = (ID3D12Resource*)bind->cpuTimestampRingBuffer; + D3D12Sample** cpu_sample_buffer = bind->sampleRingBuffer; + + // Map the range we're interesting in reading + map.Begin = ring_pos_a * sizeof(rmtU64); + map.End = ring_pos_b * sizeof(rmtU64); + if (cpu_timestamp_buffer->lpVtbl->Map(cpu_timestamp_buffer, 0, &map, (void**)&cpu_timestamps) != S_OK) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Failed to Map D3D12 CPU Timestamp Ring Buffer"); + } + + // Copy all timestamps to their expectant samples + for (query_index = ring_pos_a; query_index < ring_pos_b; query_index += 2) + { + rmtU64 us_start = (rmtU64)(cpu_timestamps[query_index] * gpu_ticks_to_us + gpu_to_cpu_timestamp_us); + rmtU64 us_end = (rmtU64)(cpu_timestamps[query_index + 1] * gpu_ticks_to_us + gpu_to_cpu_timestamp_us); + + D3D12Sample* sample = cpu_sample_buffer[query_index >> 1]; + sample->base.us_start = us_start; + Sample_Close(&sample->base, us_end); + sample->base.us_end = us_end; + } + + cpu_timestamp_buffer->lpVtbl->Unmap(cpu_timestamp_buffer, 0, NULL); + + return RMT_ERROR_NONE; +} + +static rmtError D3D12MarkFrame(D3D12BindImpl* bind) +{ + if (bind == NULL) + { + return RMT_ERROR_NONE; + } + + rmtU32 index_mask = bind->maxNbQueries - 1; + rmtU32 current_read_cpu = LoadAcquire(&bind->ringBufferRead); + rmtU32 current_write_cpu = LoadAcquire(&bind->ringBufferWrite); + + // Tell the GPU where the CPU write position is + ID3D12CommandQueue* d3d_queue = (ID3D12CommandQueue*)bind->base.queue; + d3d_queue->lpVtbl->Signal(d3d_queue, bind->gpuQueryFence, current_write_cpu); + + // Has the GPU processed any writes? + rmtU32 current_write_gpu = (rmtU32)bind->gpuQueryFence->lpVtbl->GetCompletedValue(bind->gpuQueryFence); + if (current_write_gpu > current_read_cpu) + { + rmtU64 gpu_tick_frequency; + double gpu_ticks_to_us; + rmtU64 gpu_timestamp_us; + rmtU64 cpu_timestamp_us; + rmtS64 gpu_to_cpu_timestamp_us; + + // Physical ring buffer positions + rmtU32 ring_pos_a = current_read_cpu & index_mask; + rmtU32 ring_pos_b = current_write_gpu & index_mask; + + // Get current ticks of both CPU and GPU for synchronisation + rmtU64 gpu_timestamp_ticks; + rmtU64 cpu_timestamp_ticks; + if (d3d_queue->lpVtbl->GetClockCalibration(d3d_queue, &gpu_timestamp_ticks, &cpu_timestamp_ticks) != S_OK) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Failed to D3D12 CPU/GPU Clock Calibration"); + } + + // Convert GPU ticks to microseconds + d3d_queue->lpVtbl->GetTimestampFrequency(d3d_queue, &gpu_tick_frequency); + gpu_ticks_to_us = 1000000.0 / gpu_tick_frequency; + gpu_timestamp_us = (rmtU64)(gpu_timestamp_ticks * gpu_ticks_to_us); + + // Convert CPU ticks to microseconds, offset from the global timer start + cpu_timestamp_us = usTimer_FromRawTicks(&g_Remotery->timer, cpu_timestamp_ticks); + + // And we now have the offset from GPU microseconds to CPU microseconds + gpu_to_cpu_timestamp_us = cpu_timestamp_us - gpu_timestamp_us; + + // Copy resulting timestamps to their samples + // Will have to split the copies into two passes if they cross the ring buffer wrap around + if (ring_pos_b < ring_pos_a) + { + rmtTry(CopyD3D12Timestamps(bind, ring_pos_a, bind->maxNbQueries, gpu_ticks_to_us, gpu_to_cpu_timestamp_us)); + rmtTry(CopyD3D12Timestamps(bind, 0, ring_pos_b, gpu_ticks_to_us, gpu_to_cpu_timestamp_us)); + } + else + { + rmtTry(CopyD3D12Timestamps(bind, ring_pos_a, ring_pos_b, gpu_ticks_to_us, gpu_to_cpu_timestamp_us)); + } + + // Release the ring buffer entries just processed + StoreRelease(&bind->ringBufferRead, current_write_gpu); + } + + // Attempt to empty the queue of complete message trees + Message* message; + while ((message = rmtMessageQueue_PeekNextMessage(bind->mqToD3D12Update))) + { + Msg_SampleTree* msg_sample_tree; + Sample* root_sample; + + // Ensure only D3D12 sample tree messages come through here + assert(message->id == MsgID_SampleTree); + msg_sample_tree = (Msg_SampleTree*)message->payload; + root_sample = msg_sample_tree->rootSample; + assert(root_sample->type == RMT_SampleType_D3D12); + + // If the last-allocated query in this tree has been GPU-processed it's safe to now send the tree to Remotery thread + if (current_write_gpu > msg_sample_tree->userData) + { + QueueSampleTree(g_Remotery->mq_to_rmt_thread, root_sample, msg_sample_tree->allocator, msg_sample_tree->threadName, + 0, message->threadProfiler, RMT_FALSE); + rmtMessageQueue_ConsumeNextMessage(bind->mqToD3D12Update, message); + } + else + { + break; + } + } + + // Chain to the next bind here so that root calling code doesn't need to know the definition of D3D12BindImpl + rmtTry(D3D12MarkFrame(bind->next)); + + return RMT_ERROR_NONE; +} + +static rmtError SampleD3D12GPUThreadLoop(rmtThread* rmt_thread) +{ + D3D12BindImpl* bind = (D3D12BindImpl*)rmt_thread->param; + + while (rmt_thread->request_exit == RMT_FALSE) + { + msSleep(15); + } + + return RMT_ERROR_NONE; +} + +RMT_API rmtError _rmt_BindD3D12(void* device, void* queue, rmtD3D12Bind** out_bind) +{ + D3D12BindImpl* bind; + ID3D12Device* d3d_device = (ID3D12Device*)device; + ID3D12CommandQueue* d3d_queue = (ID3D12CommandQueue*)queue; + + if (g_Remotery == NULL) + { + return RMT_ERROR_REMOTERY_NOT_CREATED; + } + + assert(device != NULL); + assert(queue != NULL); + assert(out_bind != NULL); + + // Allocate the bind container + rmtTryMalloc(D3D12BindImpl, bind); + + // Set default state + bind->base.device = device; + bind->base.queue = queue; + bind->maxNbQueries = 32 * 1024; + bind->gpuTimestampRingBuffer = NULL; + bind->cpuTimestampRingBuffer = NULL; + bind->sampleRingBuffer = NULL; + bind->ringBufferRead = 0; + bind->ringBufferWrite = 0; + bind->gpuQueryFence = NULL; + bind->mqToD3D12Update = NULL; + bind->next = NULL; + + // Create the independent ring buffer storage items + // TODO(don): Leave space beetween start and end to stop invalidating cache lines? + // NOTE(don): ABA impossible due to non-wrapping ring buffer indices + rmtTry(CreateQueryHeap(bind, d3d_device, d3d_queue, bind->maxNbQueries)); + rmtTry(CreateCpuQueries(bind, d3d_device)); + rmtTryMallocArray(D3D12Sample*, bind->sampleRingBuffer, bind->maxNbQueries / 2); + rmtTry(CreateQueryFence(bind, d3d_device)); + + rmtTryNew(rmtMessageQueue, bind->mqToD3D12Update, g_Settings.messageQueueSizeInBytes); + + // Add to the global linked list of binds + { + mtxLock(&g_Remotery->d3d12BindsMutex); + bind->next = g_Remotery->d3d12Binds; + g_Remotery->d3d12Binds = bind; + mtxUnlock(&g_Remotery->d3d12BindsMutex); + } + + *out_bind = &bind->base; + + return RMT_ERROR_NONE; +} + +RMT_API void _rmt_UnbindD3D12(rmtD3D12Bind* bind) +{ + D3D12BindImpl* d3d_bind = (D3D12BindImpl*)bind; + + assert(bind != NULL); + + // Remove from the linked list + { + mtxLock(&g_Remotery->d3d12BindsMutex); + D3D12BindImpl* cur = g_Remotery->d3d12Binds; + D3D12BindImpl* prev = NULL; + for ( ; cur != NULL; cur = cur->next) + { + if (cur == d3d_bind) + { + if (prev != NULL) + { + prev->next = cur->next; + } + else + { + g_Remotery->d3d12Binds = cur->next; + } + + break; + } + } + mtxUnlock(&g_Remotery->d3d12BindsMutex); + } + + if (d3d_bind->gpuQueryFence != NULL) + { + d3d_bind->gpuQueryFence->lpVtbl->Release(d3d_bind->gpuQueryFence); + } + + rmtFree(d3d_bind->sampleRingBuffer); + + if (d3d_bind->cpuTimestampRingBuffer != NULL) + { + d3d_bind->cpuTimestampRingBuffer->lpVtbl->Release(d3d_bind->cpuTimestampRingBuffer); + } + + if (d3d_bind->gpuTimestampRingBuffer != NULL) + { + d3d_bind->gpuTimestampRingBuffer->lpVtbl->Release(d3d_bind->gpuTimestampRingBuffer); + } +} + +static rmtError AllocateD3D12SampleTree(SampleTree** d3d_tree) +{ + rmtTryNew(SampleTree, *d3d_tree, sizeof(D3D12Sample), (ObjConstructor)D3D12Sample_Constructor, + (ObjDestructor)D3D12Sample_Destructor); + return RMT_ERROR_NONE; +} + +static rmtError AllocD3D12QueryPair(D3D12BindImpl* d3d_bind, rmtAtomicU32* out_allocation_index) +{ + // Check for overflow against a tail which is only ever written by one thread + rmtU32 read = LoadAcquire(&d3d_bind->ringBufferRead); + rmtU32 write = LoadAcquire(&d3d_bind->ringBufferWrite); + rmtU32 nb_queries = (write - read); + rmtU32 queries_left = d3d_bind->maxNbQueries - nb_queries; + if (queries_left < 2) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "D3D12 query ring buffer overflow"); + } + + *out_allocation_index = AtomicAddU32(&d3d_bind->ringBufferWrite, 2); + return RMT_ERROR_NONE; +} + +RMT_API void _rmt_BeginD3D12Sample(rmtD3D12Bind* bind, void* command_list, rmtPStr name, rmtU32* hash_cache) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL || bind == NULL) + return; + + assert(command_list != NULL); + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + Sample* sample; + rmtU32 name_hash; + SampleTree** d3d_tree; + + name_hash = ThreadProfiler_GetNameHash(thread_profiler, g_Remotery->mq_to_rmt_thread, name, hash_cache); + + // Create the D3D12 tree on-demand as the tree needs an up-front-created root. + // This is not possible to create on initialisation as a D3D12 binding is not yet available. + d3d_tree = &thread_profiler->sampleTrees[RMT_SampleType_D3D12]; + if (*d3d_tree == NULL) + { + AllocateD3D12SampleTree(d3d_tree); + } + + // Push the sample and activate the timestamp + if (ThreadProfiler_Push(*d3d_tree, name_hash, 0, &sample) == RMT_ERROR_NONE) + { + rmtError error; + + D3D12BindImpl* d3d_bind = (D3D12BindImpl*)bind; + ID3D12GraphicsCommandList* d3d_command_list = (ID3D12GraphicsCommandList*)command_list; + + D3D12Sample* d3d_sample = (D3D12Sample*)sample; + d3d_sample->bind = d3d_bind; + d3d_sample->commandList = d3d_command_list; + d3d_sample->base.usGpuIssueOnCpu = usTimer_Get(&g_Remotery->timer); + + error = AllocD3D12QueryPair(d3d_bind, &d3d_sample->queryIndex); + if (error == RMT_ERROR_NONE) + { + rmtU32 physical_query_index = d3d_sample->queryIndex & (d3d_bind->maxNbQueries - 1); + d3d_command_list->lpVtbl->EndQuery(d3d_command_list, d3d_bind->gpuTimestampRingBuffer, D3D12_QUERY_TYPE_TIMESTAMP, physical_query_index); + + // Track which D3D sample expects the timestamp results + d3d_bind->sampleRingBuffer[physical_query_index / 2] = d3d_sample; + + // Keep track of the last allocated query so we can check when the GPU has finished with them all + thread_profiler->d3d12ThreadData->lastAllocatedQueryIndex = d3d_sample->queryIndex; + } + else + { + // SET QUERY INDEX TO INVALID so that pop doesn't release it + } + } + } +} + +RMT_API void _rmt_EndD3D12Sample() +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + D3D12ThreadData* d3d_thread_data = thread_profiler->d3d12ThreadData; + D3D12Sample* d3d_sample; + + // Sample tree isn't there if D3D12 hasn't been initialised + SampleTree* d3d_tree = thread_profiler->sampleTrees[RMT_SampleType_D3D12]; + if (d3d_tree == NULL) + { + return; + } + + // Close the timestamp + d3d_sample = (D3D12Sample*)d3d_tree->currentParent; + if (d3d_sample->base.recurse_depth > 0) + { + d3d_sample->base.recurse_depth--; + } + else + { + // Issue the timestamp query for the end of the sample + D3D12BindImpl* d3d_bind = d3d_sample->bind; + ID3D12GraphicsCommandList* d3d_command_list = d3d_sample->commandList; + rmtU32 query_index = d3d_sample->queryIndex & (d3d_bind->maxNbQueries - 1); + d3d_command_list->lpVtbl->EndQuery(d3d_command_list, d3d_bind->gpuTimestampRingBuffer, D3D12_QUERY_TYPE_TIMESTAMP, + query_index + 1); + + // Immediately schedule resolve of the timestamps to CPU-visible memory + d3d_command_list->lpVtbl->ResolveQueryData(d3d_command_list, d3d_bind->gpuTimestampRingBuffer, + D3D12_QUERY_TYPE_TIMESTAMP, query_index, 2, + d3d_bind->cpuTimestampRingBuffer, query_index * sizeof(rmtU64)); + + if (ThreadProfiler_Pop(thread_profiler, d3d_bind->mqToD3D12Update, (Sample*)d3d_sample, + d3d_thread_data->lastAllocatedQueryIndex)) + { + } + } + } +} + +#endif // RMT_USE_D3D12 + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +@OpenGL: OpenGL event sampling +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#if RMT_USE_OPENGL + +#ifndef APIENTRY +#if defined(__MINGW32__) || defined(__CYGWIN__) +#define APIENTRY __stdcall +#elif (defined(_MSC_VER) && (_MSC_VER >= 800)) || defined(_STDCALL_SUPPORTED) || defined(__BORLANDC__) +#define APIENTRY __stdcall +#else +#define APIENTRY +#endif +#endif + +#ifndef GLAPI +#if defined(__MINGW32__) || defined(__CYGWIN__) +#define GLAPI extern +#elif defined(_WIN32) +#define GLAPI WINGDIAPI +#else +#define GLAPI extern +#endif +#endif + +#ifndef GLAPIENTRY +#define GLAPIENTRY APIENTRY +#endif + +typedef rmtU32 GLenum; +typedef rmtU32 GLuint; +typedef rmtS32 GLint; +typedef rmtS32 GLsizei; +typedef rmtU64 GLuint64; +typedef rmtS64 GLint64; +typedef unsigned char GLubyte; + +typedef GLenum(GLAPIENTRY* PFNGLGETERRORPROC)(void); +typedef void(GLAPIENTRY* PFNGLGENQUERIESPROC)(GLsizei n, GLuint* ids); +typedef void(GLAPIENTRY* PFNGLDELETEQUERIESPROC)(GLsizei n, const GLuint* ids); +typedef void(GLAPIENTRY* PFNGLBEGINQUERYPROC)(GLenum target, GLuint id); +typedef void(GLAPIENTRY* PFNGLENDQUERYPROC)(GLenum target); +typedef void(GLAPIENTRY* PFNGLGETQUERYOBJECTIVPROC)(GLuint id, GLenum pname, GLint* params); +typedef void(GLAPIENTRY* PFNGLGETQUERYOBJECTUIVPROC)(GLuint id, GLenum pname, GLuint* params); +typedef void(GLAPIENTRY* PFNGLGETQUERYOBJECTI64VPROC)(GLuint id, GLenum pname, GLint64* params); +typedef void(GLAPIENTRY* PFNGLGETQUERYOBJECTUI64VPROC)(GLuint id, GLenum pname, GLuint64* params); +typedef void(GLAPIENTRY* PFNGLQUERYCOUNTERPROC)(GLuint id, GLenum target); +typedef void(GLAPIENTRY* PFNGLGETINTEGER64VPROC)(GLenum pname, GLint64* data); +typedef void(GLAPIENTRY* PFNGLFINISHPROC)(void); + +#define GL_NO_ERROR 0 +#define GL_QUERY_RESULT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#define GL_TIME_ELAPSED 0x88BF +#define GL_TIMESTAMP 0x8E28 + +#define RMT_GL_GET_FUN(x) \ + assert(g_Remotery->opengl->x != NULL); \ + g_Remotery->opengl->x + +#define rmtglGenQueries RMT_GL_GET_FUN(__glGenQueries) +#define rmtglDeleteQueries RMT_GL_GET_FUN(__glDeleteQueries) +#define rmtglBeginQuery RMT_GL_GET_FUN(__glBeginQuery) +#define rmtglEndQuery RMT_GL_GET_FUN(__glEndQuery) +#define rmtglGetQueryObjectiv RMT_GL_GET_FUN(__glGetQueryObjectiv) +#define rmtglGetQueryObjectuiv RMT_GL_GET_FUN(__glGetQueryObjectuiv) +#define rmtglGetQueryObjecti64v RMT_GL_GET_FUN(__glGetQueryObjecti64v) +#define rmtglGetQueryObjectui64v RMT_GL_GET_FUN(__glGetQueryObjectui64v) +#define rmtglQueryCounter RMT_GL_GET_FUN(__glQueryCounter) +#define rmtglGetInteger64v RMT_GL_GET_FUN(__glGetInteger64v) +#define rmtglFinish RMT_GL_GET_FUN(__glFinish) + +struct OpenGL_t +{ + // Handle to the OS OpenGL DLL + void* dll_handle; + + PFNGLGETERRORPROC __glGetError; + PFNGLGENQUERIESPROC __glGenQueries; + PFNGLDELETEQUERIESPROC __glDeleteQueries; + PFNGLBEGINQUERYPROC __glBeginQuery; + PFNGLENDQUERYPROC __glEndQuery; + PFNGLGETQUERYOBJECTIVPROC __glGetQueryObjectiv; + PFNGLGETQUERYOBJECTUIVPROC __glGetQueryObjectuiv; + PFNGLGETQUERYOBJECTI64VPROC __glGetQueryObjecti64v; + PFNGLGETQUERYOBJECTUI64VPROC __glGetQueryObjectui64v; + PFNGLQUERYCOUNTERPROC __glQueryCounter; + PFNGLGETINTEGER64VPROC __glGetInteger64v; + PFNGLFINISHPROC __glFinish; + + // Queue to the OpenGL main update thread + // Given that BeginSample/EndSample need to be called from the same thread that does the update, there + // is really no need for this to be a thread-safe queue. I'm using it for its convenience. + rmtMessageQueue* mq_to_opengl_main; + + // Mark the first time so that remaining timestamps are offset from this + rmtU64 first_timestamp; + // Last time in us (CPU time, via usTimer_Get) since we last resync'ed CPU & GPU + rmtU64 last_resync; + + // Sample trees in transit in the message queue for release on shutdown + Buffer* flush_samples; +}; + +static GLenum rmtglGetError(void) +{ + if (g_Remotery != NULL) + { + assert(g_Remotery->opengl != NULL); + if (g_Remotery->opengl->__glGetError != NULL) + return g_Remotery->opengl->__glGetError(); + } + + return (GLenum)0; +} + +#ifdef RMT_PLATFORM_LINUX +#ifdef __cplusplus +extern "C" void* glXGetProcAddressARB(const GLubyte*); +#else +extern void* glXGetProcAddressARB(const GLubyte*); +#endif +#endif + +static ProcReturnType rmtglGetProcAddress(OpenGL* opengl, const char* symbol) +{ +#if defined(RMT_PLATFORM_WINDOWS) + { + // Get OpenGL extension-loading function for each call + typedef ProcReturnType(WINAPI * wglGetProcAddressFn)(LPCSTR); + assert(opengl != NULL); + { + wglGetProcAddressFn wglGetProcAddress = + (wglGetProcAddressFn)rmtGetProcAddress(opengl->dll_handle, "wglGetProcAddress"); + if (wglGetProcAddress != NULL) + return wglGetProcAddress(symbol); + } + } + +#elif defined(RMT_PLATFORM_MACOS) && !defined(GLEW_APPLE_GLX) + + return rmtGetProcAddress(opengl->dll_handle, symbol); + +#elif defined(RMT_PLATFORM_LINUX) + + return glXGetProcAddressARB((const GLubyte*)symbol); + +#endif + + return NULL; +} + +static rmtError OpenGL_Create(OpenGL** opengl) +{ + assert(opengl != NULL); + + rmtTryMalloc(OpenGL, *opengl); + + (*opengl)->dll_handle = NULL; + + (*opengl)->__glGetError = NULL; + (*opengl)->__glGenQueries = NULL; + (*opengl)->__glDeleteQueries = NULL; + (*opengl)->__glBeginQuery = NULL; + (*opengl)->__glEndQuery = NULL; + (*opengl)->__glGetQueryObjectiv = NULL; + (*opengl)->__glGetQueryObjectuiv = NULL; + (*opengl)->__glGetQueryObjecti64v = NULL; + (*opengl)->__glGetQueryObjectui64v = NULL; + (*opengl)->__glQueryCounter = NULL; + (*opengl)->__glGetInteger64v = NULL; + (*opengl)->__glFinish = NULL; + + (*opengl)->mq_to_opengl_main = NULL; + (*opengl)->first_timestamp = 0; + (*opengl)->last_resync = 0; + (*opengl)->flush_samples = NULL; + + rmtTryNew(Buffer, (*opengl)->flush_samples, 8 * 1024); + rmtTryNew(rmtMessageQueue, (*opengl)->mq_to_opengl_main, g_Settings.messageQueueSizeInBytes); + + return RMT_ERROR_NONE; +} + +static void OpenGL_Destructor(OpenGL* opengl) +{ + assert(opengl != NULL); + rmtDelete(rmtMessageQueue, opengl->mq_to_opengl_main); + rmtDelete(Buffer, opengl->flush_samples); +} + +static void SyncOpenGLCpuGpuTimes(rmtU64* out_first_timestamp, rmtU64* out_last_resync) +{ + rmtU64 cpu_time_start = 0; + rmtU64 cpu_time_stop = 0; + rmtU64 average_half_RTT = 0; // RTT = Rountrip Time. + GLint64 gpu_base = 0; + int i; + + rmtglFinish(); + + for (i = 0; i < RMT_GPU_CPU_SYNC_NUM_ITERATIONS; ++i) + { + rmtU64 half_RTT; + + rmtglFinish(); + cpu_time_start = usTimer_Get(&g_Remotery->timer); + rmtglGetInteger64v(GL_TIMESTAMP, &gpu_base); + cpu_time_stop = usTimer_Get(&g_Remotery->timer); + // Average the time it takes a roundtrip from CPU to GPU + // while doing nothing other than getting timestamps + half_RTT = (cpu_time_stop - cpu_time_start) >> 1ULL; + if (i == 0) + average_half_RTT = half_RTT; + else + average_half_RTT = (average_half_RTT + half_RTT) >> 1ULL; + } + + // All GPU times are offset from gpu_base, and then taken to + // the same relative origin CPU timestamps are based on. + // CPU is in us, we must translate it to ns. + *out_first_timestamp = (rmtU64)(gpu_base) - (cpu_time_start + average_half_RTT) * 1000ULL; + *out_last_resync = cpu_time_stop; +} + +typedef struct OpenGLTimestamp +{ + // Inherit so that timestamps can be quickly allocated + ObjectLink Link; + + // Pair of timestamp queries that wrap the sample + GLuint queries[2]; + rmtU64 cpu_timestamp; +} OpenGLTimestamp; + +static rmtError OpenGLTimestamp_Constructor(OpenGLTimestamp* stamp) +{ + GLenum error; + + assert(stamp != NULL); + + ObjectLink_Constructor((ObjectLink*)stamp); + + // Set defaults + stamp->queries[0] = stamp->queries[1] = 0; + stamp->cpu_timestamp = 0; + + // Empty the error queue before using it for glGenQueries + while ((error = rmtglGetError()) != GL_NO_ERROR) + ; + + // Create start/end timestamp queries + assert(g_Remotery != NULL); + rmtglGenQueries(2, stamp->queries); + error = rmtglGetError(); + if (error != GL_NO_ERROR) + return RMT_ERROR_OPENGL_ERROR; + + return RMT_ERROR_NONE; +} + +static void OpenGLTimestamp_Destructor(OpenGLTimestamp* stamp) +{ + assert(stamp != NULL); + + // Destroy queries + if (stamp->queries[0] != 0) + rmtglDeleteQueries(2, stamp->queries); +} + +static void OpenGLTimestamp_Begin(OpenGLTimestamp* stamp) +{ + assert(stamp != NULL); + + // First query + assert(g_Remotery != NULL); + stamp->cpu_timestamp = usTimer_Get(&g_Remotery->timer); + rmtglQueryCounter(stamp->queries[0], GL_TIMESTAMP); +} + +static void OpenGLTimestamp_End(OpenGLTimestamp* stamp) +{ + assert(stamp != NULL); + + // Second query + assert(g_Remotery != NULL); + rmtglQueryCounter(stamp->queries[1], GL_TIMESTAMP); +} + +static rmtBool OpenGLTimestamp_GetData(OpenGLTimestamp* stamp, rmtU64* out_start, rmtU64* out_end, + rmtU64* out_first_timestamp, rmtU64* out_last_resync) +{ + GLuint64 start = 0, end = 0; + GLint startAvailable = 0, endAvailable = 0; + + assert(g_Remotery != NULL); + + assert(stamp != NULL); + assert(stamp->queries[0] != 0 && stamp->queries[1] != 0); + + // Check to see if all queries are ready + // If any fail to arrive, wait until later + rmtglGetQueryObjectiv(stamp->queries[0], GL_QUERY_RESULT_AVAILABLE, &startAvailable); + if (!startAvailable) + return RMT_FALSE; + rmtglGetQueryObjectiv(stamp->queries[1], GL_QUERY_RESULT_AVAILABLE, &endAvailable); + if (!endAvailable) + return RMT_FALSE; + + rmtglGetQueryObjectui64v(stamp->queries[0], GL_QUERY_RESULT, &start); + rmtglGetQueryObjectui64v(stamp->queries[1], GL_QUERY_RESULT, &end); + + // Mark the first timestamp. We may resync if we detect the GPU timestamp is in the + // past (i.e. happened before the CPU command) since it should be impossible. + assert(out_first_timestamp != NULL); + if (*out_first_timestamp == 0 || ((start - *out_first_timestamp) / 1000ULL) < stamp->cpu_timestamp) + SyncOpenGLCpuGpuTimes(out_first_timestamp, out_last_resync); + + // Calculate start and end timestamps (we want us, the queries give us ns) + *out_start = (rmtU64)(start - *out_first_timestamp) / 1000ULL; + *out_end = (rmtU64)(end - *out_first_timestamp) / 1000ULL; + + return RMT_TRUE; +} + +typedef struct OpenGLSample +{ + // IS-A inheritance relationship + Sample base; + + OpenGLTimestamp* timestamp; + +} OpenGLSample; + +static rmtError OpenGLSample_Constructor(OpenGLSample* sample) +{ + assert(sample != NULL); + + // Chain to sample constructor + Sample_Constructor((Sample*)sample); + sample->base.type = RMT_SampleType_OpenGL; + rmtTryNew(OpenGLTimestamp, sample->timestamp); + + return RMT_ERROR_NONE; +} + +static void OpenGLSample_Destructor(OpenGLSample* sample) +{ + rmtDelete(OpenGLTimestamp, sample->timestamp); + Sample_Destructor((Sample*)sample); +} + +RMT_API void _rmt_BindOpenGL() +{ + if (g_Remotery != NULL) + { + OpenGL* opengl = g_Remotery->opengl; + assert(opengl != NULL); + +#if defined(RMT_PLATFORM_WINDOWS) + opengl->dll_handle = rmtLoadLibrary("opengl32.dll"); +#elif defined(RMT_PLATFORM_MACOS) + opengl->dll_handle = rmtLoadLibrary("/System/Library/Frameworks/OpenGL.framework/Versions/Current/OpenGL"); +#elif defined(RMT_PLATFORM_LINUX) + opengl->dll_handle = rmtLoadLibrary("libGL.so"); +#endif + + opengl->__glGetError = (PFNGLGETERRORPROC)rmtGetProcAddress(opengl->dll_handle, "glGetError"); + opengl->__glGenQueries = (PFNGLGENQUERIESPROC)rmtglGetProcAddress(opengl, "glGenQueries"); + opengl->__glDeleteQueries = (PFNGLDELETEQUERIESPROC)rmtglGetProcAddress(opengl, "glDeleteQueries"); + opengl->__glBeginQuery = (PFNGLBEGINQUERYPROC)rmtglGetProcAddress(opengl, "glBeginQuery"); + opengl->__glEndQuery = (PFNGLENDQUERYPROC)rmtglGetProcAddress(opengl, "glEndQuery"); + opengl->__glGetQueryObjectiv = (PFNGLGETQUERYOBJECTIVPROC)rmtglGetProcAddress(opengl, "glGetQueryObjectiv"); + opengl->__glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC)rmtglGetProcAddress(opengl, "glGetQueryObjectuiv"); + opengl->__glGetQueryObjecti64v = + (PFNGLGETQUERYOBJECTI64VPROC)rmtglGetProcAddress(opengl, "glGetQueryObjecti64v"); + opengl->__glGetQueryObjectui64v = + (PFNGLGETQUERYOBJECTUI64VPROC)rmtglGetProcAddress(opengl, "glGetQueryObjectui64v"); + opengl->__glQueryCounter = (PFNGLQUERYCOUNTERPROC)rmtglGetProcAddress(opengl, "glQueryCounter"); + opengl->__glGetInteger64v = (PFNGLGETINTEGER64VPROC)rmtglGetProcAddress(opengl, "glGetInteger64v"); + opengl->__glFinish = (PFNGLFINISHPROC)rmtGetProcAddress(opengl->dll_handle, "glFinish"); + } +} + +static void UpdateOpenGLFrame(void); + +RMT_API void _rmt_UnbindOpenGL(void) +{ + if (g_Remotery != NULL) + { + OpenGL* opengl = g_Remotery->opengl; + assert(opengl != NULL); + + // Stall waiting for the OpenGL queue to empty into the Remotery queue + while (!rmtMessageQueue_IsEmpty(opengl->mq_to_opengl_main)) + UpdateOpenGLFrame(); + + // There will be a whole bunch of OpenGL sample trees queued up the remotery queue that need releasing + FreePendingSampleTrees(g_Remotery, RMT_SampleType_OpenGL, opengl->flush_samples); + + // Forcefully delete sample tree on this thread to release time stamps from + // the same thread that created them + Remotery_DeleteSampleTree(g_Remotery, RMT_SampleType_OpenGL); + + // Release reference to the OpenGL DLL + if (opengl->dll_handle != NULL) + { + rmtFreeLibrary(opengl->dll_handle); + opengl->dll_handle = NULL; + } + } +} + +static rmtError AllocateOpenGLSampleTree(SampleTree** ogl_tree) +{ + rmtTryNew(SampleTree, *ogl_tree, sizeof(OpenGLSample), (ObjConstructor)OpenGLSample_Constructor, + (ObjDestructor)OpenGLSample_Destructor); + return RMT_ERROR_NONE; +} + +RMT_API void _rmt_BeginOpenGLSample(rmtPStr name, rmtU32* hash_cache) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + Sample* sample; + rmtU32 name_hash = ThreadProfiler_GetNameHash(thread_profiler, g_Remotery->mq_to_rmt_thread, name, hash_cache); + + // Create the OpenGL tree on-demand as the tree needs an up-front-created root. + // This is not possible to create on initialisation as a OpenGL binding is not yet available. + SampleTree** ogl_tree = &thread_profiler->sampleTrees[RMT_SampleType_OpenGL]; + if (*ogl_tree == NULL) + { + AllocateOpenGLSampleTree(ogl_tree); + } + + // Push the sample and activate the timestamp + if (ThreadProfiler_Push(*ogl_tree, name_hash, 0, &sample) == RMT_ERROR_NONE) + { + OpenGLSample* ogl_sample = (OpenGLSample*)sample; + ogl_sample->base.usGpuIssueOnCpu = usTimer_Get(&g_Remotery->timer); + OpenGLTimestamp_Begin(ogl_sample->timestamp); + } + } +} + +static rmtBool GetOpenGLSampleTimes(Sample* sample, rmtU64* out_first_timestamp, rmtU64* out_last_resync) +{ + Sample* child; + + OpenGLSample* ogl_sample = (OpenGLSample*)sample; + + assert(sample != NULL); + if (ogl_sample->timestamp != NULL) + { + assert(out_last_resync != NULL); +#if (RMT_GPU_CPU_SYNC_SECONDS > 0) + if (*out_last_resync < ogl_sample->timestamp->cpu_timestamp) + { + // Convert from us to seconds. + rmtU64 time_diff = (ogl_sample->timestamp->cpu_timestamp - *out_last_resync) / 1000000ULL; + if (time_diff > RMT_GPU_CPU_SYNC_SECONDS) + SyncOpenGLCpuGpuTimes(out_first_timestamp, out_last_resync); + } +#endif + + if (!OpenGLTimestamp_GetData(ogl_sample->timestamp, &sample->us_start, &sample->us_end, out_first_timestamp, + out_last_resync)) + return RMT_FALSE; + + sample->us_length = sample->us_end - sample->us_start; + } + + // Get child sample times + for (child = sample->first_child; child != NULL; child = child->next_sibling) + { + if (!GetOpenGLSampleTimes(child, out_first_timestamp, out_last_resync)) + return RMT_FALSE; + } + + return RMT_TRUE; +} + +static void UpdateOpenGLFrame(void) +{ + OpenGL* opengl; + + if (g_Remotery == NULL) + return; + + opengl = g_Remotery->opengl; + assert(opengl != NULL); + + rmt_BeginCPUSample(rmt_UpdateOpenGLFrame, 0); + + // Process all messages in the OpenGL queue + while (1) + { + Msg_SampleTree* sample_tree; + Sample* sample; + + Message* message = rmtMessageQueue_PeekNextMessage(opengl->mq_to_opengl_main); + if (message == NULL) + break; + + // There's only one valid message type in this queue + assert(message->id == MsgID_SampleTree); + sample_tree = (Msg_SampleTree*)message->payload; + sample = sample_tree->rootSample; + assert(sample->type == RMT_SampleType_OpenGL); + + // Retrieve timing of all OpenGL samples + // If they aren't ready leave the message unconsumed, holding up later frames and maintaining order + if (!GetOpenGLSampleTimes(sample, &opengl->first_timestamp, &opengl->last_resync)) + break; + + // Pass samples onto the remotery thread for sending to the viewer + QueueSampleTree(g_Remotery->mq_to_rmt_thread, sample, sample_tree->allocator, sample_tree->threadName, 0, + message->threadProfiler, RMT_FALSE); + rmtMessageQueue_ConsumeNextMessage(opengl->mq_to_opengl_main, message); + } + + rmt_EndCPUSample(); +} + +RMT_API void _rmt_EndOpenGLSample(void) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + // Close the timestamp + OpenGLSample* ogl_sample = (OpenGLSample*)thread_profiler->sampleTrees[RMT_SampleType_OpenGL]->currentParent; + if (ogl_sample->base.recurse_depth > 0) + { + ogl_sample->base.recurse_depth--; + } + else + { + if (ogl_sample->timestamp != NULL) + OpenGLTimestamp_End(ogl_sample->timestamp); + + // Send to the update loop for ready-polling + if (ThreadProfiler_Pop(thread_profiler, g_Remotery->opengl->mq_to_opengl_main, (Sample*)ogl_sample, 0)) + // Perform ready-polling on popping of the root sample + UpdateOpenGLFrame(); + } + } +} + +#endif // RMT_USE_OPENGL + +/* + ------------------------------------------------------------------------------------------------------------------------ + ------------------------------------------------------------------------------------------------------------------------ + @Metal: Metal event sampling + ------------------------------------------------------------------------------------------------------------------------ + ------------------------------------------------------------------------------------------------------------------------ + */ + +#if RMT_USE_METAL + +struct Metal_t +{ + // Queue to the Metal main update thread + // Given that BeginSample/EndSample need to be called from the same thread that does the update, there + // is really no need for this to be a thread-safe queue. I'm using it for its convenience. + rmtMessageQueue* mq_to_metal_main; +}; + +static rmtError Metal_Create(Metal** metal) +{ + assert(metal != NULL); + + rmtTryMalloc(Metal, *metal); + + (*metal)->mq_to_metal_main = NULL; + + rmtTryNew(rmtMessageQueue, (*metal)->mq_to_metal_main, g_Settings.messageQueueSizeInBytes); + + return RMT_ERROR_NONE; +} + +static void Metal_Destructor(Metal* metal) +{ + assert(metal != NULL); + rmtDelete(rmtMessageQueue, metal->mq_to_metal_main); +} + +typedef struct MetalTimestamp +{ + // Inherit so that timestamps can be quickly allocated + ObjectLink Link; + + // Output from GPU callbacks + rmtU64 start; + rmtU64 end; + rmtBool ready; +} MetalTimestamp; + +static rmtError MetalTimestamp_Constructor(MetalTimestamp* stamp) +{ + assert(stamp != NULL); + + ObjectLink_Constructor((ObjectLink*)stamp); + + // Set defaults + stamp->start = 0; + stamp->end = 0; + stamp->ready = RMT_FALSE; + + return RMT_ERROR_NONE; +} + +static void MetalTimestamp_Destructor(MetalTimestamp* stamp) +{ + assert(stamp != NULL); +} + +rmtU64 rmtMetal_usGetTime() +{ + // Share the CPU timer for auto-sync + assert(g_Remotery != NULL); + return usTimer_Get(&g_Remotery->timer); +} + +void rmtMetal_MeasureCommandBuffer(unsigned long long* out_start, unsigned long long* out_end, unsigned int* out_ready); + +static void MetalTimestamp_Begin(MetalTimestamp* stamp) +{ + assert(stamp != NULL); + stamp->ready = RMT_FALSE; + + // Metal can currently only issue callbacks at the command buffer level + // So for now measure execution of the entire command buffer + rmtMetal_MeasureCommandBuffer(&stamp->start, &stamp->end, &stamp->ready); +} + +static void MetalTimestamp_End(MetalTimestamp* stamp) +{ + assert(stamp != NULL); + + // As Metal can currently only measure entire command buffers, this function is a no-op + // as the completed handler was already issued in Begin +} + +static rmtBool MetalTimestamp_GetData(MetalTimestamp* stamp, rmtU64* out_start, rmtU64* out_end) +{ + assert(g_Remotery != NULL); + assert(stamp != NULL); + + // GPU writes ready flag when complete handler is called + if (stamp->ready == RMT_FALSE) + return RMT_FALSE; + + *out_start = stamp->start; + *out_end = stamp->end; + + return RMT_TRUE; +} + +typedef struct MetalSample +{ + // IS-A inheritance relationship + Sample base; + + MetalTimestamp* timestamp; + +} MetalSample; + +static rmtError MetalSample_Constructor(MetalSample* sample) +{ + assert(sample != NULL); + + // Chain to sample constructor + Sample_Constructor((Sample*)sample); + sample->base.type = RMT_SampleType_Metal; + rmtTryNew(MetalTimestamp, sample->timestamp); + + return RMT_ERROR_NONE; +} + +static void MetalSample_Destructor(MetalSample* sample) +{ + rmtDelete(MetalTimestamp, sample->timestamp); + Sample_Destructor((Sample*)sample); +} + +static void UpdateOpenGLFrame(void); + +/*RMT_API void _rmt_UnbindMetal(void) +{ + if (g_Remotery != NULL) + { + Metal* metal = g_Remotery->metal; + assert(metal != NULL); + + // Stall waiting for the Metal queue to empty into the Remotery queue + while (!rmtMessageQueue_IsEmpty(metal->mq_to_metal_main)) + UpdateMetalFrame(); + + // Forcefully delete sample tree on this thread to release time stamps from + // the same thread that created them + Remotery_BlockingDeleteSampleTree(g_Remotery, RMT_SampleType_Metal); + } +}*/ + +RMT_API rmtError _rmt_BeginMetalSample(rmtPStr name, rmtU32* hash_cache) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + { + return RMT_ERROR_UNKNOWN; + } + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + Sample* sample; + rmtU32 name_hash = ThreadProfiler_GetNameHash(thread_profiler, g_Remotery->mq_to_rmt_thread, name, hash_cache); + + // Create the Metal tree on-demand as the tree needs an up-front-created root. + // This is not possible to create on initialisation as a Metal binding is not yet available. + SampleTree** metal_tree = &thread_profiler->sampleTrees[RMT_SampleType_Metal]; + if (*metal_tree == NULL) + { + rmtTryNew(SampleTree, *metal_tree, sizeof(MetalSample), (ObjConstructor)MetalSample_Constructor, + (ObjDestructor)MetalSample_Destructor); + } + + // Push the sample and activate the timestamp + if (ThreadProfiler_Push(*metal_tree, name_hash, 0, &sample) == RMT_ERROR_NONE) + { + MetalSample* metal_sample = (MetalSample*)sample; + metal_sample->base.usGpuIssueOnCpu = usTimer_Get(&g_Remotery->timer); + MetalTimestamp_Begin(metal_sample->timestamp); + } + } + + return RMT_ERROR_NONE; +} + +static rmtBool GetMetalSampleTimes(Sample* sample) +{ + Sample* child; + + MetalSample* metal_sample = (MetalSample*)sample; + + assert(sample != NULL); + if (metal_sample->timestamp != NULL) + { + if (!MetalTimestamp_GetData(metal_sample->timestamp, &sample->us_start, &sample->us_end)) + return RMT_FALSE; + + sample->us_length = sample->us_end - sample->us_start; + } + + // Get child sample times + for (child = sample->first_child; child != NULL; child = child->next_sibling) + { + if (!GetMetalSampleTimes(child)) + return RMT_FALSE; + } + + return RMT_TRUE; +} + +static void UpdateMetalFrame(void) +{ + Metal* metal; + + if (g_Remotery == NULL) + return; + + metal = g_Remotery->metal; + assert(metal != NULL); + + rmt_BeginCPUSample(rmt_UpdateMetalFrame, 0); + + // Process all messages in the Metal queue + while (1) + { + Msg_SampleTree* sample_tree; + Sample* sample; + + Message* message = rmtMessageQueue_PeekNextMessage(metal->mq_to_metal_main); + if (message == NULL) + break; + + // There's only one valid message type in this queue + assert(message->id == MsgID_SampleTree); + sample_tree = (Msg_SampleTree*)message->payload; + sample = sample_tree->rootSample; + assert(sample->type == RMT_SampleType_Metal); + + // Retrieve timing of all Metal samples + // If they aren't ready leave the message unconsumed, holding up later frames and maintaining order + if (!GetMetalSampleTimes(sample)) + break; + + // Pass samples onto the remotery thread for sending to the viewer + QueueSampleTree(g_Remotery->mq_to_rmt_thread, sample, sample_tree->allocator, sample_tree->threadName, 0, + message->threadProfiler, RMT_FALSE); + rmtMessageQueue_ConsumeNextMessage(metal->mq_to_metal_main, message); + } + + rmt_EndCPUSample(); +} + +RMT_API void _rmt_EndMetalSample(void) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + // Close the timestamp + MetalSample* metal_sample = (MetalSample*)thread_profiler->sampleTrees[RMT_SampleType_Metal]->currentParent; + if (metal_sample->base.recurse_depth > 0) + { + metal_sample->base.recurse_depth--; + } + else + { + if (metal_sample->timestamp != NULL) + MetalTimestamp_End(metal_sample->timestamp); + + // Send to the update loop for ready-polling + if (ThreadProfiler_Pop(thread_profiler, g_Remotery->metal->mq_to_metal_main, (Sample*)metal_sample, 0)) + // Perform ready-polling on popping of the root sample + UpdateMetalFrame(); + } + } +} + +#endif // RMT_USE_METAL + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ + @VULKAN: Vulkan event sampling +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +#if RMT_USE_VULKAN + +#include + +#define VULKAN_CALL(bind, fn) ((PFN_ ## fn)bind->funcs.fn) + +typedef struct VulkanThreadData +{ + rmtU32 lastAllocatedQueryIndex; + + // Sample trees in transit in the message queue for release on shutdown + Buffer* flushSamples; +} VulkanThreadData; + +static rmtError VulkanThreadData_Create(VulkanThreadData** vulkan_thread_data) +{ + assert(vulkan_thread_data != NULL); + + // Allocate space for the Vulkan data + rmtTryMalloc(VulkanThreadData, *vulkan_thread_data); + + // Set defaults + (*vulkan_thread_data)->lastAllocatedQueryIndex = 0; + (*vulkan_thread_data)->flushSamples = NULL; + + rmtTryNew(Buffer, (*vulkan_thread_data)->flushSamples, 8 * 1024); + + return RMT_ERROR_NONE; +} + +static void VulkanThreadData_Destructor(VulkanThreadData* vulkan_thread_data) +{ + assert(vulkan_thread_data != NULL); + rmtDelete(Buffer, vulkan_thread_data->flushSamples); +} + +typedef struct VulkanSample +{ + // IS-A inheritance relationship + Sample base; + + // Cached bind and command buffer used to create the sample so that the user doesn't have to pass it + struct VulkanBindImpl* bind; + VkCommandBuffer commandBuffer; + + // Begin/End timestamp indices in the query heap + rmtU32 queryIndex; + +} VulkanSample; + +static rmtError VulkanSample_Constructor(VulkanSample* sample) +{ + assert(sample != NULL); + + // Chain to sample constructor + Sample_Constructor((Sample*)sample); + sample->base.type = RMT_SampleType_Vulkan; + sample->bind = NULL; + sample->commandBuffer = NULL; + sample->queryIndex = 0; + + return RMT_ERROR_NONE; +} + +static void VulkanSample_Destructor(VulkanSample* sample) +{ + Sample_Destructor((Sample*)sample); +} + +typedef struct VulkanBindImpl +{ + rmtVulkanBind base; + rmtVulkanFunctions funcs; + + // Ring buffer of GPU timestamp destinations for all queries + rmtU32 maxNbQueries; + VkQueryPool gpuTimestampRingBuffer; + + // CPU-accessible copy destination for all timestamps + rmtU64* cpuTimestampRingBuffer; + + // Pointers to samples that expect the result of timestamps + VulkanSample** sampleRingBuffer; + + // Read/write positions of the ring buffer allocator, synchronising access to all the ring buffers at once + // NOTE(valakor): These are 64-bit instead of 32-bit so that we can reasonably assume they never wrap. + // TODO(valakor): Separate by cache line? + rmtAtomicU64 ringBufferRead; + rmtAtomicU64 ringBufferWrite; + + VkSemaphore gpuQuerySemaphore; + + // Convert gpu ticks to us, retrieved from physical device properties + double gpu_ticks_to_us; + + // Queue to the Vulkan main update thread + rmtMessageQueue* mqToVulkanUpdate; + + struct VulkanBindImpl* next; + +} VulkanBindImpl; + +static rmtError CreateQueryPool(VulkanBindImpl* bind, VkDevice vulkan_device, rmtU32 nb_queries) +{ + VkQueryPoolCreateInfo create_info; + memset(&create_info, 0, sizeof(create_info)); + create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + create_info.queryType = VK_QUERY_TYPE_TIMESTAMP; + create_info.queryCount = nb_queries; + + if (VULKAN_CALL(bind, vkCreateQueryPool)(vulkan_device, &create_info, NULL, &bind->gpuTimestampRingBuffer) != VK_SUCCESS) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "Failed to create Vulkan Query Pool"); + } + + VULKAN_CALL(bind, vkResetQueryPool)(vulkan_device, bind->gpuTimestampRingBuffer, 0, nb_queries); + + return RMT_ERROR_NONE; +} + +static rmtError CreateQuerySemaphore(VulkanBindImpl* bind, VkDevice vulkan_device) +{ + VkSemaphoreTypeCreateInfoKHR type_info; + memset(&type_info, 0, sizeof(type_info)); + type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR; + type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR; + type_info.initialValue = 0; + + VkSemaphoreCreateInfo create_info; + memset(&create_info, 0, sizeof(create_info)); + create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + create_info.pNext = &type_info; + + if (VULKAN_CALL(bind, vkCreateSemaphore)(vulkan_device, &create_info, NULL, &bind->gpuQuerySemaphore) != VK_SUCCESS) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "Failed to create Vulkan Query Semaphore"); + } + + return RMT_ERROR_NONE; +} + +static rmtError CopyVulkanTimestamps(VulkanBindImpl* bind, VkDevice vulkan_device, rmtU32 ring_pos_a, rmtU32 ring_pos_b, double gpu_ticks_to_us, rmtS64 gpu_to_cpu_timestamp_us) +{ + rmtU32 query_index; + VulkanSample** cpu_sample_buffer = bind->sampleRingBuffer; + rmtU64* cpu_timestamps = bind->cpuTimestampRingBuffer; + + rmtU32 query_count = ring_pos_b - ring_pos_a; + rmtU64 query_size = query_count * sizeof(rmtU64); + + if (query_count == 0) + return RMT_ERROR_NONE; + + VULKAN_CALL(bind, vkGetQueryPoolResults)(vulkan_device, bind->gpuTimestampRingBuffer, ring_pos_a, query_count, query_size, cpu_timestamps + ring_pos_a, + sizeof(rmtU64), VK_QUERY_RESULT_64_BIT); + + // Copy all timestamps to their expectant samples + for (query_index = ring_pos_a; query_index < ring_pos_b; query_index += 2) + { + rmtU64 us_start = (rmtU64)(cpu_timestamps[query_index] * gpu_ticks_to_us + gpu_to_cpu_timestamp_us); + rmtU64 us_end = (rmtU64)(cpu_timestamps[query_index + 1] * gpu_ticks_to_us + gpu_to_cpu_timestamp_us); + + VulkanSample* sample = cpu_sample_buffer[query_index >> 1]; + sample->base.us_start = us_start; + Sample_Close(&sample->base, us_end); + sample->base.us_end = us_end; + } + + // Reset the query pool indices + VULKAN_CALL(bind, vkResetQueryPool)(vulkan_device, bind->gpuTimestampRingBuffer, ring_pos_a, query_count); + + return RMT_ERROR_NONE; +} + +static rmtError UpdateGpuTicksToUs(VulkanBindImpl* bind, VkPhysicalDevice vulkan_physical_device) +{ + // TODO(valakor): Is this slow? We could cache timestampPeriod during initialization, but on some devices + // (namely some Apple devices using Vulkan via MoltenVK, potentially others) the value is dynamic and can + // change on every call. For more information see: + // https://github.com/KhronosGroup/MoltenVK/blob/main/Docs/MoltenVK_Runtime_UserGuide.md + + VkPhysicalDeviceProperties device_properties; + memset(&device_properties, 0, sizeof(device_properties)); + VULKAN_CALL(bind, vkGetPhysicalDeviceProperties)(vulkan_physical_device, &device_properties); + + float gpu_ns_per_tick = device_properties.limits.timestampPeriod; + bind->gpu_ticks_to_us = gpu_ns_per_tick / 1000.0; + + return RMT_ERROR_NONE; +} + +static rmtError GetTimestampCalibration(VulkanBindImpl* bind, VkPhysicalDevice vulkan_physical_device, VkDevice vulkan_device, double* gpu_ticks_to_us, rmtS64* gpu_to_cpu_timestamp_us) +{ + // TODO(valakor): Honor RMT_GPU_CPU_SYNC_SECONDS? It's unclear to me how expensive vkGetCalibratedTimestampsEXT is + // on all supported platforms, but at least on my Windows/NVIDIA machine it was on the order of 100-150us. + + rmtU64 gpu_timestamp_ticks; + rmtU64 cpu_timestamp_ticks; + rmtU64 gpu_timestamp_us; + rmtU64 cpu_timestamp_us; + float gpu_tick_period; + + // Always query a device timestamp + rmtU32 timestamp_count = 1; + rmtU64 max_deviation; + rmtU64 timestamps[2]; + VkCalibratedTimestampInfoEXT timestamp_infos[2]; + memset(timestamp_infos, 0, sizeof(timestamp_infos)); + timestamp_infos[0].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + timestamp_infos[0].timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; + + // TODO(valakor): Reconsider whether we bother asking Vulkan to give us a CPU timestamp at all. It'd be much + // simpler to just query the device timestamp (supported by all platforms) and manually query our timer instead + // of all this platform-specific code. All we need is something "close enough". + + // Potentially also query a cpu timestamp if supported +#if defined(RMT_PLATFORM_WINDOWS) + timestamp_count = 2; + timestamp_infos[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + timestamp_infos[1].timeDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; +#elif 0 // defined(RMT_PLATFORM_MACOS) + // TODO(valakor): We have to fall back to manually querying CPU time due to the following issue: + // On Apple platforms MoltenVK reports support for VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT, which matches the time + // domain of mach_continuous_time(). To support mach_absolute_time() Vulkan would have to extend the available + // time domains to include something like "VK_TIME_DOMAIN_CLOCK_UPTIME_RAW_EXT". See the comments here: + // https://github.com/KhronosGroup/MoltenVK/blob/main/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm + // + // Alternatively, Remotery could switch to using mach_continuous_time(). The difference between the two is that + // mach_continuous_time() (CLOCK_MONOTONIC_RAW) includes system sleep time, whereas mach_absolute_time() + // (CLOCK_UPTIME_RAW) does not. I'm not 100% convinced that's what we would want, but I think it is technically + // more secure. + timestamp_count = 2; + timestamp_infos[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + timestamp_infos[1].timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; +#else + // On Linux Remotery uses CLOCK_REALTIME (though it probably shouldn't), but Vulkan only provides time domains for + // CLOCK_MONOTONIC and CLOCK_MONOTONIC_RAW. For now we'll just query the CPU here manually and hope it's close enough. + timestamp_count = 1; +#endif + + // TODO(valakor): Consider taking max_deviation into account. Docs state that users may want to call vkGetCalibratedTimestamps + // multiple times in a row until retrieving a max deviation that is "acceptable". We could just call it a set number of + // times and take the min, or determine a reasonable average during init and ensure we get something close to that here. + + if (VULKAN_CALL(bind, vkGetCalibratedTimestampsEXT)(vulkan_device, timestamp_count, timestamp_infos, timestamps, &max_deviation) != VK_SUCCESS) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Failed to get Vulkan calibrated timestamps"); + } + + // Convert CPU ticks to microseconds, offset from the global timer start +#if defined(RMT_PLATFORM_WINDOWS) // || defined(RMT_PLATFORM_MACOS) + cpu_timestamp_ticks = timestamps[1]; + cpu_timestamp_us = usTimer_FromRawTicks(&g_Remotery->timer, cpu_timestamp_ticks); +#else + cpu_timestamp_us = usTimer_Get(&g_Remotery->timer); +#endif + + UpdateGpuTicksToUs(bind, vulkan_physical_device); + *gpu_ticks_to_us = bind->gpu_ticks_to_us; + + // Convert GPU ticks to microseconds + gpu_timestamp_ticks = timestamps[0]; + gpu_timestamp_us = (rmtU64)(gpu_timestamp_ticks * bind->gpu_ticks_to_us); + + // And we now have the offset from GPU microseconds to CPU microseconds + *gpu_to_cpu_timestamp_us = cpu_timestamp_us - gpu_timestamp_us; + + return RMT_ERROR_NONE; +} + +static rmtError VulkanMarkFrame(VulkanBindImpl* bind, rmtBool recurse) +{ + if (bind == NULL) + { + return RMT_ERROR_NONE; + } + + VkPhysicalDevice vulkan_physical_device = (VkPhysicalDevice)bind->base.physical_device; + VkDevice vulkan_device = (VkDevice)bind->base.device; + VkQueue vulkan_queue = (VkQueue)bind->base.queue; + + rmtU64 index_mask = (rmtU64)bind->maxNbQueries - 1; + rmtU64 current_read_cpu = LoadAcquire64(&bind->ringBufferRead); + rmtU64 current_write_cpu = LoadAcquire64(&bind->ringBufferWrite); + rmtU32 current_read_cpu_index = (rmtU32)(current_read_cpu & index_mask); + + // Has the GPU processed any writes? + rmtU64 current_write_gpu = 0; + if (VULKAN_CALL(bind, vkGetSemaphoreCounterValue)(vulkan_device, bind->gpuQuerySemaphore, ¤t_write_gpu) != VK_SUCCESS) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Failed to get Vulkan Semaphore value"); + } + + if (current_write_cpu > current_write_gpu) + { + // Tell the GPU where the CPU write position is + // NOTE(valakor): Vulkan spec states that signalling a timeline semaphore must strictly increase its value + VkTimelineSemaphoreSubmitInfoKHR semaphore_submit_info; + memset(&semaphore_submit_info, 0, sizeof(semaphore_submit_info)); + semaphore_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + semaphore_submit_info.signalSemaphoreValueCount = 1; + semaphore_submit_info.pSignalSemaphoreValues = ¤t_write_cpu; + + VkSubmitInfo submit_info; + memset(&submit_info, 0, sizeof(submit_info)); + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = &semaphore_submit_info; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &bind->gpuQuerySemaphore; + if (VULKAN_CALL(bind, vkQueueSubmit)(vulkan_queue, 1, &submit_info, NULL) != VK_SUCCESS) + { + return rmtMakeError(RMT_ERROR_RESOURCE_ACCESS_FAIL, "Failed to submit Vulkan Semaphore update to queue"); + } + } + + if (current_write_gpu > current_read_cpu) + { + double gpu_ticks_to_us; + rmtS64 gpu_to_cpu_timestamp_us; + + // Physical ring buffer positions + rmtU32 ring_pos_a = current_read_cpu_index; + rmtU32 ring_pos_b = (rmtU32)(current_write_gpu & index_mask); + + rmtTry(GetTimestampCalibration(bind, vulkan_physical_device, vulkan_device, &gpu_ticks_to_us, &gpu_to_cpu_timestamp_us)); + + // Copy resulting timestamps to their samples + // Will have to split the copies into two passes if they cross the ring buffer wrap around + if (ring_pos_b < ring_pos_a) + { + rmtTry(CopyVulkanTimestamps(bind, vulkan_device, ring_pos_a, bind->maxNbQueries, gpu_ticks_to_us, gpu_to_cpu_timestamp_us)); + rmtTry(CopyVulkanTimestamps(bind, vulkan_device, 0, ring_pos_b, gpu_ticks_to_us, gpu_to_cpu_timestamp_us)); + } + else + { + rmtTry(CopyVulkanTimestamps(bind, vulkan_device, ring_pos_a, ring_pos_b, gpu_ticks_to_us, gpu_to_cpu_timestamp_us)); + } + + // Release the ring buffer entries just processed + StoreRelease64(&bind->ringBufferRead, current_write_gpu); + } + + // Attempt to empty the queue of complete message trees + Message* message; + while ((message = rmtMessageQueue_PeekNextMessage(bind->mqToVulkanUpdate))) + { + Msg_SampleTree* msg_sample_tree; + Sample* root_sample; + + // Ensure only Vulkan sample tree messages come through here + assert(message->id == MsgID_SampleTree); + msg_sample_tree = (Msg_SampleTree*)message->payload; + root_sample = msg_sample_tree->rootSample; + assert(root_sample->type == RMT_SampleType_Vulkan); + + // If the last-allocated query in this tree has been GPU-processed it's safe to now send the tree to Remotery thread + rmtU32 sample_tree_write_index = msg_sample_tree->userData; + rmtU64 sample_tree_write = (rmtU64)(sample_tree_write_index - current_read_cpu_index) + current_read_cpu; + if (current_write_gpu > sample_tree_write) + { + QueueSampleTree(g_Remotery->mq_to_rmt_thread, root_sample, msg_sample_tree->allocator, msg_sample_tree->threadName, + 0, message->threadProfiler, RMT_FALSE); + rmtMessageQueue_ConsumeNextMessage(bind->mqToVulkanUpdate, message); + } + else + { + break; + } + } + + // Chain to the next bind here so that root calling code doesn't need to know the definition of VulkanBindImpl + if (recurse) + { + rmtTry(VulkanMarkFrame(bind->next, recurse)); + } + + return RMT_ERROR_NONE; +} + +RMT_API rmtError _rmt_BindVulkan(void* instance, void* physical_device, void* device, void* queue, const rmtVulkanFunctions* funcs, rmtVulkanBind** out_bind) +{ + VulkanBindImpl* bind; + VkInstance vulkan_instance = (VkInstance)instance; + VkPhysicalDevice vulkan_physical_device = (VkPhysicalDevice)physical_device; + VkDevice vulkan_device = (VkDevice)device; + VkQueue vulkan_queue = (VkQueue)queue; + + if (g_Remotery == NULL) + return RMT_ERROR_REMOTERY_NOT_CREATED; + + if (instance == NULL) + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Missing instance"); + + if (physical_device == NULL) + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Missing physical_device"); + + if (device == NULL) + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Missing device"); + + if (queue == NULL) + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Missing queue"); + + if (funcs == NULL) + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Missing funcs"); + + if (out_bind == NULL) + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Missing out_bind"); + + #define CHECK_VK_FUNC(fn) \ + if (funcs->fn == NULL) \ + return rmtMakeError(RMT_ERROR_INVALID_INPUT, "Missing " #fn) + + CHECK_VK_FUNC(vkGetPhysicalDeviceProperties); + CHECK_VK_FUNC(vkQueueSubmit); + CHECK_VK_FUNC(vkQueueWaitIdle); + CHECK_VK_FUNC(vkCreateQueryPool); + CHECK_VK_FUNC(vkDestroyQueryPool); + CHECK_VK_FUNC(vkResetQueryPool); + CHECK_VK_FUNC(vkGetQueryPoolResults); + CHECK_VK_FUNC(vkCmdWriteTimestamp); + CHECK_VK_FUNC(vkCreateSemaphore); + CHECK_VK_FUNC(vkDestroySemaphore); + CHECK_VK_FUNC(vkSignalSemaphore); + CHECK_VK_FUNC(vkGetSemaphoreCounterValue); + CHECK_VK_FUNC(vkGetCalibratedTimestampsEXT); + +#undef CHECK_VK_FUNC + + // Allocate the bind container + // TODO(valakor): If anything after this fails we'll leak this bind instance + rmtTryMalloc(VulkanBindImpl, bind); + + // Set default state + bind->base.physical_device = physical_device; + bind->base.device = device; + bind->base.queue = queue; + bind->funcs = *funcs; +#ifdef RMT_PLATFORM_MACOS + // NOTE(valakor): Vulkan on MacOS via MoltenVK only supports timestamp query pools of up to 4k 64-bit queries. See + // https://github.com/KhronosGroup/MoltenVK/blob/main/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm + bind->maxNbQueries = 4 * 1024; +#else + bind->maxNbQueries = 32 * 1024; +#endif + bind->gpuTimestampRingBuffer = NULL; + bind->cpuTimestampRingBuffer = NULL; + bind->sampleRingBuffer = NULL; + bind->ringBufferRead = 0; + bind->ringBufferWrite = 0; + bind->gpuQuerySemaphore = NULL; + bind->gpu_ticks_to_us = 1.0; + bind->mqToVulkanUpdate = NULL; + bind->next = NULL; + + // Create the independent ring buffer storage items + // TODO(valakor): Leave space beetween start and end to stop invalidating cache lines? + // NOTE(valakor): ABA impossible due to non-wrapping ring buffer indices + rmtTry(CreateQueryPool(bind, vulkan_device, bind->maxNbQueries)); + rmtTryMallocArray(VulkanSample*, bind->sampleRingBuffer, bind->maxNbQueries / 2); + rmtTryMallocArray(rmtU64, bind->cpuTimestampRingBuffer, bind->maxNbQueries); + rmtTry(CreateQuerySemaphore(bind, vulkan_device)); + + rmtTryNew(rmtMessageQueue, bind->mqToVulkanUpdate, g_Settings.messageQueueSizeInBytes); + + // Add to the global linked list of binds + { + mtxLock(&g_Remotery->vulkanBindsMutex); + bind->next = g_Remotery->vulkanBinds; + g_Remotery->vulkanBinds = bind; + mtxUnlock(&g_Remotery->vulkanBindsMutex); + } + + *out_bind = &bind->base; + + return RMT_ERROR_NONE; +} + +RMT_API void _rmt_UnbindVulkan(rmtVulkanBind* bind) +{ + VulkanBindImpl* vulkan_bind = (VulkanBindImpl*)bind; + VkDevice vulkan_device = (VkDevice)vulkan_bind->base.device; + VkQueue vulkan_queue = (VkQueue)vulkan_bind->base.queue; + + assert(bind != NULL); + + // Remove from the linked list + { + mtxLock(&g_Remotery->vulkanBindsMutex); + VulkanBindImpl* cur = g_Remotery->vulkanBinds; + VulkanBindImpl* prev = NULL; + for ( ; cur != NULL; cur = cur->next) + { + if (cur == vulkan_bind) + { + if (prev != NULL) + { + prev->next = cur->next; + } + else + { + g_Remotery->vulkanBinds = cur->next; + } + + break; + } + } + mtxUnlock(&g_Remotery->vulkanBindsMutex); + } + + // Ensure all samples submitted to the GPU are consumed for clean shutdown + if (LoadAcquire64(&vulkan_bind->ringBufferWrite) > LoadAcquire64(&vulkan_bind->ringBufferRead)) + { + VulkanMarkFrame(vulkan_bind, RMT_FALSE); + VULKAN_CALL(vulkan_bind, vkQueueWaitIdle)(vulkan_queue); + VulkanMarkFrame(vulkan_bind, RMT_FALSE); + } + + // Clean up bind resources + + rmtDelete(rmtMessageQueue, vulkan_bind->mqToVulkanUpdate); + + if (vulkan_bind->gpuQuerySemaphore != NULL) + { + VULKAN_CALL(vulkan_bind, vkDestroySemaphore)(vulkan_device, vulkan_bind->gpuQuerySemaphore, NULL); + } + + rmtFree(vulkan_bind->sampleRingBuffer); + rmtFree(vulkan_bind->cpuTimestampRingBuffer); + + if (vulkan_bind->gpuTimestampRingBuffer != NULL) + { + VULKAN_CALL(vulkan_bind, vkDestroyQueryPool)(vulkan_device, vulkan_bind->gpuTimestampRingBuffer, NULL); + } +} + +static rmtError AllocateVulkanSampleTree(SampleTree** vulkan_tree) +{ + rmtTryNew(SampleTree, *vulkan_tree, sizeof(VulkanSample), (ObjConstructor)VulkanSample_Constructor, + (ObjDestructor)VulkanSample_Destructor); + return RMT_ERROR_NONE; +} + +static rmtError AllocVulkanQueryPair(VulkanBindImpl* vulkan_bind, rmtU32* out_allocation_index) +{ + // Check for overflow against a tail which is only ever written by one thread + rmtU64 read = LoadAcquire64(&vulkan_bind->ringBufferRead); + rmtU64 write = LoadAcquire64(&vulkan_bind->ringBufferWrite); + rmtU32 nb_queries = (rmtU32)(write - read); + rmtU32 queries_left = vulkan_bind->maxNbQueries - nb_queries; + if (queries_left < 2) + { + return rmtMakeError(RMT_ERROR_RESOURCE_CREATE_FAIL, "Vulkan query ring buffer overflow"); + } + + rmtU64 index_mask = (rmtU64)vulkan_bind->maxNbQueries - 1; + *out_allocation_index = (rmtU32)(AtomicAddU64(&vulkan_bind->ringBufferWrite, 2) & index_mask); + return RMT_ERROR_NONE; +} + +RMT_API void _rmt_BeginVulkanSample(rmtVulkanBind* bind, void* command_buffer, rmtPStr name, rmtU32* hash_cache) +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL || bind == NULL) + return; + + assert(command_buffer != NULL); + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + Sample* sample; + rmtU32 name_hash; + SampleTree** vulkan_tree; + + name_hash = ThreadProfiler_GetNameHash(thread_profiler, g_Remotery->mq_to_rmt_thread, name, hash_cache); + + // Create the Vulkan tree on-demand as the tree needs an up-front-created root. + // This is not possible to create on initialisation as a Vulkan binding is not yet available. + vulkan_tree = &thread_profiler->sampleTrees[RMT_SampleType_Vulkan]; + if (*vulkan_tree == NULL) + { + AllocateVulkanSampleTree(vulkan_tree); + } + + // Push the sample and activate the timestamp + if (ThreadProfiler_Push(*vulkan_tree, name_hash, 0, &sample) == RMT_ERROR_NONE) + { + rmtError error; + + VulkanBindImpl* vulkan_bind = (VulkanBindImpl*)bind; + VkCommandBuffer vulkan_command_buffer = (VkCommandBuffer)command_buffer; + + VulkanSample* vulkan_sample = (VulkanSample*)sample; + vulkan_sample->bind = vulkan_bind; + vulkan_sample->commandBuffer = vulkan_command_buffer; + vulkan_sample->base.usGpuIssueOnCpu = usTimer_Get(&g_Remotery->timer); + + error = AllocVulkanQueryPair(vulkan_bind, &vulkan_sample->queryIndex); + if (error == RMT_ERROR_NONE) + { + rmtU32 physical_query_index = vulkan_sample->queryIndex & (vulkan_bind->maxNbQueries - 1); + VULKAN_CALL(vulkan_bind, vkCmdWriteTimestamp)(vulkan_command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vulkan_bind->gpuTimestampRingBuffer, physical_query_index); + + // Track which Vulkan sample expects the timestamp results + vulkan_bind->sampleRingBuffer[physical_query_index / 2] = vulkan_sample; + + // Keep track of the last allocated query so we can check when the GPU has finished with them all + thread_profiler->vulkanThreadData->lastAllocatedQueryIndex = vulkan_sample->queryIndex; + } + else + { + // SET QUERY INDEX TO INVALID so that pop doesn't release it + } + } + } +} + +RMT_API void _rmt_EndVulkanSample() +{ + ThreadProfiler* thread_profiler; + + if (g_Remotery == NULL) + return; + + if (ThreadProfilers_GetCurrentThreadProfiler(g_Remotery->threadProfilers, &thread_profiler) == RMT_ERROR_NONE) + { + VulkanThreadData* vulkan_thread_data = thread_profiler->vulkanThreadData; + VulkanSample* vulkan_sample; + + // Sample tree isn't there if Vulkan hasn't been initialised + SampleTree* vulkan_tree = thread_profiler->sampleTrees[RMT_SampleType_Vulkan]; + if (vulkan_tree == NULL) + { + return; + } + + // Close the timestamp + vulkan_sample = (VulkanSample*)vulkan_tree->currentParent; + if (vulkan_sample->base.recurse_depth > 0) + { + vulkan_sample->base.recurse_depth--; + } + else + { + // Issue the timestamp query for the end of the sample + VulkanBindImpl* vulkan_bind = vulkan_sample->bind; + VkCommandBuffer vulkan_command_buffer = vulkan_sample->commandBuffer; + rmtU32 query_index = vulkan_sample->queryIndex & (vulkan_bind->maxNbQueries - 1); + VULKAN_CALL(vulkan_bind, vkCmdWriteTimestamp)(vulkan_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + vulkan_bind->gpuTimestampRingBuffer, query_index + 1); + + if (ThreadProfiler_Pop(thread_profiler, vulkan_bind->mqToVulkanUpdate, (Sample*)vulkan_sample, + vulkan_thread_data->lastAllocatedQueryIndex)) + { + } + } + } +} + +#endif // RMT_USE_VULKAN + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +@SAMPLEAPI: Sample API for user callbacks +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// Iterator +RMT_API void _rmt_IterateChildren(rmtSampleIterator* iterator, rmtSample* sample) +{ + iterator->sample = 0; + iterator->initial = sample != NULL ? sample->first_child : 0; +} + +RMT_API rmtBool _rmt_IterateNext(rmtSampleIterator* iter) +{ + if (iter->initial != NULL) + { + iter->sample = iter->initial; + iter->initial = 0; + } + else + { + if (iter->sample != NULL) + iter->sample = iter->sample->next_sibling; + } + + return iter->sample != NULL ? RMT_TRUE : RMT_FALSE; +} + +// Sample tree accessors +RMT_API const char* _rmt_SampleTreeGetThreadName(rmtSampleTree* sample_tree) +{ + return sample_tree->threadName; +} + +RMT_API rmtSample* _rmt_SampleTreeGetRootSample(rmtSampleTree* sample_tree) +{ + return sample_tree->rootSample; +} + +// Sample accessors +RMT_API const char* _rmt_SampleGetName(rmtSample* sample) +{ + const char* name = StringTable_Find(g_Remotery->string_table, sample->name_hash); + if (name == NULL) + { + return "null"; + } + return name; +} + +RMT_API rmtU32 _rmt_SampleGetNameHash(rmtSample* sample) +{ + return sample->name_hash; +} + +RMT_API rmtU32 _rmt_SampleGetCallCount(rmtSample* sample) +{ + return sample->call_count; +} + +RMT_API rmtU64 _rmt_SampleGetStart(rmtSample* sample) +{ + return sample->us_start; +} + +RMT_API rmtU64 _rmt_SampleGetTime(rmtSample* sample) +{ + return sample->us_length; +} + +RMT_API rmtU64 _rmt_SampleGetSelfTime(rmtSample* sample) +{ + return (rmtU64)maxS64(sample->us_length - sample->us_sampled_length, 0); +} + +RMT_API rmtSampleType _rmt_SampleGetType(rmtSample* sample) +{ + return sample->type; +} + +RMT_API void _rmt_SampleGetColour(rmtSample* sample, rmtU8* r, rmtU8* g, rmtU8* b) +{ + *r = sample->uniqueColour[0]; + *g = sample->uniqueColour[1]; + *b = sample->uniqueColour[2]; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +@PROPERTYAPI: Property API for user callbacks +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +// Iterator +RMT_API void _rmt_PropertyIterateChildren(rmtPropertyIterator* iterator, rmtProperty* property) +{ + iterator->property = 0; + iterator->initial = property != NULL ? property->firstChild : 0; +} + +RMT_API rmtBool _rmt_PropertyIterateNext(rmtPropertyIterator* iter) +{ + if (iter->initial != NULL) + { + iter->property = iter->initial; + iter->initial = 0; + } + else + { + if (iter->property != NULL) + iter->property = iter->property->nextSibling; + } + + return iter->property != NULL ? RMT_TRUE : RMT_FALSE; +} + +// Property accessors +RMT_API const char* _rmt_PropertyGetName(rmtProperty* property) +{ + return property->name; +} + +RMT_API const char* _rmt_PropertyGetDescription(rmtProperty* property) +{ + return property->description; +} + +RMT_API rmtU32 _rmt_PropertyGetNameHash(rmtProperty* property) +{ + return property->nameHash; +} + +RMT_API rmtPropertyType _rmt_PropertyGetType(rmtProperty* property) +{ + return property->type; +} + +RMT_API rmtPropertyValue _rmt_PropertyGetValue(rmtProperty* property) +{ + return property->value; +} + +/* +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +@PROPERTIES: Property API +------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------------------ +*/ + +static void RegisterProperty(rmtProperty* property, rmtBool can_lock) +{ + if (property->initialised == RMT_FALSE) + { + // Apply for a lock once at the start of the recursive walk + if (can_lock) + { + mtxLock(&g_Remotery->propertyMutex); + } + + // Multiple threads accessing the same property can apply for the lock at the same time as the `initialised` property for + // each of them may not be set yet. One thread only will get the lock successfully while the others will only come through + // here when the first thread has finished initialising. The first thread through will have `initialised` set to RMT_FALSE + // while all other threads will see it in its initialised state. Skip those so that we don't register multiple times. + if (property->initialised == RMT_FALSE) + { + rmtU32 name_len; + + // With no parent, add this to the root property + rmtProperty* parent_property = property->parent; + if (parent_property == NULL) + { + property->parent = &g_Remotery->rootProperty; + parent_property = property->parent; + } + + // Walk up to parent properties first in case they haven't been registered + RegisterProperty(parent_property, RMT_FALSE); + + // Link this property into the parent's list + if (parent_property->firstChild != NULL) + { + parent_property->lastChild->nextSibling = property; + parent_property->lastChild = property; + } + else + { + parent_property->firstChild = property; + parent_property->lastChild = property; + } + + // Calculate the name hash and send it to the viewer + name_len = strnlen_s(property->name, 256); + property->nameHash = _rmt_HashString32(property->name, name_len, 0); + QueueAddToStringTable(g_Remotery->mq_to_rmt_thread, property->nameHash, property->name, name_len, NULL); + + // Generate a unique ID for this property in the tree + property->uniqueID = parent_property->uniqueID; + property->uniqueID = HashCombine(property->uniqueID, property->nameHash); + + property->initialised = RMT_TRUE; + } + + // Unlock on the way out of recursive walk + if (can_lock) + { + mtxUnlock(&g_Remotery->propertyMutex); + } + } +} + +RMT_API void _rmt_PropertySetValue(rmtProperty* property) +{ + if (g_Remotery == NULL) + { + return; + } + + RegisterProperty(property, RMT_TRUE); + + // on this thread, create a new sample that encodes the value just set + + // send the sample to remotery UI and disk log + + // value resets and sets don't have delta values, really +} + +RMT_API void _rmt_PropertyAddValue(rmtProperty* property, rmtPropertyValue add_value) +{ + if (g_Remotery == NULL) + { + return; + } + + RegisterProperty(property, RMT_TRUE); + + RMT_UNREFERENCED_PARAMETER(add_value); + + // use `add_value` to determine how much this property was changed + + // on this thread, create a new sample that encodes the delta and parents itself to `property` + // could also encode the current value of the property at this point + + // send the sample to remotery UI and disk log +} + +static rmtError TakePropertySnapshot(rmtProperty* property, PropertySnapshot* parent_snapshot, PropertySnapshot** first_snapshot, PropertySnapshot** prev_snapshot, rmtU32 depth) +{ + rmtError error; + rmtProperty* child_property; + + // Allocate some state for the property + PropertySnapshot* snapshot; + error = ObjectAllocator_Alloc(g_Remotery->propertyAllocator, (void**)&snapshot); + if (error != RMT_ERROR_NONE) + { + return error; + } + + // Snapshot the property + snapshot->type = property->type; + snapshot->value = property->value; + snapshot->prevValue = property->prevValue; + snapshot->prevValueFrame = property->prevValueFrame; + snapshot->nameHash = property->nameHash; + snapshot->uniqueID = property->uniqueID; + snapshot->nbChildren = 0; + snapshot->depth = (rmtU8)depth; + snapshot->nextSnapshot = NULL; + + // Keep count of the number of children in the parent + if (parent_snapshot != NULL) + { + parent_snapshot->nbChildren++; + } + + // Link into the linear list + if (*first_snapshot == NULL) + { + *first_snapshot = snapshot; + } + if (*prev_snapshot != NULL) + { + (*prev_snapshot)->nextSnapshot = snapshot; + } + *prev_snapshot = snapshot; + + // Snapshot the children + for (child_property = property->firstChild; child_property != NULL; child_property = child_property->nextSibling) + { + error = TakePropertySnapshot(child_property, snapshot, first_snapshot, prev_snapshot, depth + 1); + if (error != RMT_ERROR_NONE) + { + return error; + } + } + + return RMT_ERROR_NONE; +} + +RMT_API rmtError _rmt_PropertySnapshotAll() +{ + rmtError error; + PropertySnapshot* first_snapshot; + PropertySnapshot* prev_snapshot; + Msg_PropertySnapshot* payload; + Message* message; + rmtU32 nb_snapshot_allocs; + + if (g_Remotery == NULL) + { + return RMT_ERROR_REMOTERY_NOT_CREATED; + } + + // Don't do anything if any properties haven't been registered yet + if (g_Remotery->rootProperty.firstChild == NULL) + { + return RMT_ERROR_NONE; + } + + // Mark current allocation count so we can quickly calculate the number of snapshots being sent + nb_snapshot_allocs = g_Remotery->propertyAllocator->nb_inuse; + + // Snapshot from the root into a linear list + first_snapshot = NULL; + prev_snapshot = NULL; + mtxLock(&g_Remotery->propertyMutex); + error = TakePropertySnapshot(&g_Remotery->rootProperty, NULL, &first_snapshot, &prev_snapshot, 0); + + if (g_Settings.snapshot_callback != NULL) + { + g_Settings.snapshot_callback(g_Settings.snapshot_context, &g_Remotery->rootProperty); + } + + mtxUnlock(&g_Remotery->propertyMutex); + if (error != RMT_ERROR_NONE) + { + FreePropertySnapshots(first_snapshot); + return error; + } + + // Attempt to allocate a message for sending the snapshot to the viewer + message = rmtMessageQueue_AllocMessage(g_Remotery->mq_to_rmt_thread, sizeof(Msg_PropertySnapshot), NULL); + if (message == NULL) + { + FreePropertySnapshots(first_snapshot); + return RMT_ERROR_UNKNOWN; + } + + // Populate and commit + payload = (Msg_PropertySnapshot*)message->payload; + payload->rootSnapshot = first_snapshot; + payload->nbSnapshots = g_Remotery->propertyAllocator->nb_inuse - nb_snapshot_allocs; + payload->propertyFrame = g_Remotery->propertyFrame; + rmtMessageQueue_CommitMessage(message, MsgID_PropertySnapshot); + + return RMT_ERROR_NONE; +} + +static void PropertyFrameReset(Remotery* rmt, rmtProperty* first_property) +{ + rmtProperty* property; + for (property = first_property; property != NULL; property = property->nextSibling) + { + PropertyFrameReset(rmt, property->firstChild); + + // TODO(don): It might actually be quicker to sign-extend assignments but this gives me a nice debug hook for now + rmtBool changed = RMT_FALSE; + switch (property->type) + { + case RMT_PropertyType_rmtGroup: + break; + + case RMT_PropertyType_rmtBool: + changed = property->lastFrameValue.Bool != property->value.Bool; + break; + + case RMT_PropertyType_rmtS32: + case RMT_PropertyType_rmtU32: + case RMT_PropertyType_rmtF32: + changed = property->lastFrameValue.U32 != property->value.U32; + break; + + case RMT_PropertyType_rmtS64: + case RMT_PropertyType_rmtU64: + case RMT_PropertyType_rmtF64: + changed = property->lastFrameValue.U64 != property->value.U64; + break; + } + + if (changed) + { + property->prevValue = property->lastFrameValue; + property->prevValueFrame = rmt->propertyFrame; + } + + property->lastFrameValue = property->value; + + if ((property->flags & RMT_PropertyFlags_FrameReset) != 0) + { + property->value = property->defaultValue; + } + } +} + +RMT_API void _rmt_PropertyFrameResetAll() +{ + if (g_Remotery == NULL) + { + return; + } + + mtxLock(&g_Remotery->propertyMutex); + PropertyFrameReset(g_Remotery, g_Remotery->rootProperty.firstChild); + mtxUnlock(&g_Remotery->propertyMutex); + + g_Remotery->propertyFrame++; +} + +#endif // RMT_ENABLED diff --git a/remotery/lib/Remotery.h b/remotery/lib/Remotery.h new file mode 100644 index 0000000..4fe5311 --- /dev/null +++ b/remotery/lib/Remotery.h @@ -0,0 +1,1216 @@ + +/* +Copyright 2014-2022 Celtoys Ltd + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* + +Compiling +--------- + +* Windows (MSVC) - add lib/Remotery.c and lib/Remotery.h to your program. Set include + directories to add Remotery/lib path. The required library ws2_32.lib should be picked + up through the use of the #pragma comment(lib, "ws2_32.lib") directive in Remotery.c. + +* Mac OS X (XCode) - simply add lib/Remotery.c and lib/Remotery.h to your program. + +* Linux (GCC) - add the source in lib folder. Compilation of the code requires -pthreads for + library linkage. For example to compile the same run: cc lib/Remotery.c sample/sample.c + -I lib -pthread -lm + +* Vulkan - Ensure your include directories are set such that the Vulkan headers can be + included with the statement: #include . Currently the Vulkan implementation + requires either Vulkan 1.2+ with the "hostQueryReset" and "timelineSemaphore" features enabled, + or < 1.1 with the "VK_EXT_host_query_reset" and "VK_KHR_timeline_semaphore" extensions. The + extension "VK_EXT_calibrated_timestamps" is also always required. + +You can define some extra macros to modify what features are compiled into Remotery. These are +documented just below this comment. + +*/ + + +#ifndef RMT_INCLUDED_H +#define RMT_INCLUDED_H + + +// Set to 0 to not include any bits of Remotery in your build +#ifndef RMT_ENABLED +#define RMT_ENABLED 1 +#endif + +// Help performance of the server sending data to the client by marking this machine as little-endian +#ifndef RMT_ASSUME_LITTLE_ENDIAN +#define RMT_ASSUME_LITTLE_ENDIAN 0 +#endif + +// Used by the Celtoys TinyCRT library (not released yet) +#ifndef RMT_USE_TINYCRT +#define RMT_USE_TINYCRT 0 +#endif + +// Assuming CUDA headers/libs are setup, allow CUDA profiling +#ifndef RMT_USE_CUDA +#define RMT_USE_CUDA 0 +#endif + +// Assuming Direct3D 11 headers/libs are setup, allow D3D11 profiling +#ifndef RMT_USE_D3D11 +#define RMT_USE_D3D11 0 +#endif + +// Allow D3D12 profiling +#ifndef RMT_USE_D3D12 +#define RMT_USE_D3D12 0 +#endif + +// Allow OpenGL profiling +#ifndef RMT_USE_OPENGL +#define RMT_USE_OPENGL 0 +#endif + +// Allow Metal profiling +#ifndef RMT_USE_METAL +#define RMT_USE_METAL 0 +#endif + +// Allow Vulkan profiling +#ifndef RMT_USE_VULKAN +#define RMT_USE_VULKAN 0 +#endif + +// Initially use POSIX thread names to name threads instead of Thread0, 1, ... +#ifndef RMT_USE_POSIX_THREADNAMES +#define RMT_USE_POSIX_THREADNAMES 0 +#endif + +// How many times we spin data back and forth between CPU & GPU +// to calculate average RTT (Roundtrip Time). Cannot be 0. +// Affects OpenGL & D3D11 +#ifndef RMT_GPU_CPU_SYNC_NUM_ITERATIONS +#define RMT_GPU_CPU_SYNC_NUM_ITERATIONS 16 +#endif + +// Time in seconds between each resync to compensate for drifting between GPU & CPU timers, +// effects of power saving, etc. Resyncs can cause stutter, lag spikes, stalls. +// Set to 0 for never. +// Affects OpenGL & D3D11 +#ifndef RMT_GPU_CPU_SYNC_SECONDS +#define RMT_GPU_CPU_SYNC_SECONDS 30 +#endif + +// Whether we should automatically resync if we detect a timer disjoint (e.g. +// changed from AC power to battery, GPU is overheating, or throttling up/down +// due to laptop savings events). Set it to 0 to avoid resync in such events. +// Useful if for some odd reason a driver reports a lot of disjoints. +// Affects D3D11 +#ifndef RMT_D3D11_RESYNC_ON_DISJOINT +#define RMT_D3D11_RESYNC_ON_DISJOINT 1 +#endif + +// If RMT_USE_INTERNAL_HASH_FUNCTION is defined to 1, the internal hash function for strings is used. +// This is the default setting. +// If you set RMT_USE_INTERNAL_HASH_FUNCTION to 0, you must implement rmt_HashString32 yourself. +#ifndef RMT_USE_INTERNAL_HASH_FUNCTION +#define RMT_USE_INTERNAL_HASH_FUNCTION 1 +#endif + +// If RMT_USE_LEGACY_ATOMICS is defined to 1, the implementation will use the legacy fallback atomic functions +// The default setting is 0 +#ifndef RMT_USE_LEGACY_ATOMICS +#define RMT_USE_LEGACY_ATOMICS 0 +#endif + +/*-------------------------------------------------------------------------------------------------------------------------------- + Compiler/Platform Detection and Preprocessor Utilities +---------------------------------------------------------------------------------------------------------------------------------*/ + + +// Platform identification +#if defined(_WINDOWS) || defined(_WIN32) + #define RMT_PLATFORM_WINDOWS +#elif defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) + #define RMT_PLATFORM_LINUX + #define RMT_PLATFORM_POSIX +#elif defined(__APPLE__) + #define RMT_PLATFORM_MACOS + #define RMT_PLATFORM_POSIX +#endif + +// Architecture identification +#ifdef RMT_PLATFORM_WINDOWS +#if defined(_M_AMD64) || defined(__x86_64__) // MSVC defines _M_AMD64 and MinGW-64 defines __x86_64__ +#define RMT_ARCH_64BIT +#else +#define RMT_ARCH_32BIT +#endif +#endif + +#if __GNUC__ || __clang__ +#if __x86_64__ || __ppc64__ || __amd64__ || __arm64__ +#define RMT_ARCH_64BIT +#else +#define RMT_ARCH_32BIT +#endif +#endif + + +#ifdef RMT_DLL + #if defined (RMT_PLATFORM_WINDOWS) + #if defined (RMT_IMPL) + #define RMT_API __declspec(dllexport) + #else + #define RMT_API __declspec(dllimport) + #endif + #elif defined (RMT_PLATFORM_POSIX) + #if defined (RMT_IMPL) + #define RMT_API __attribute__((visibility("default"))) + #else + #define RMT_API + #endif + #endif +#else + #define RMT_API +#endif + +// Allows macros to be written that can work around the inability to do: #define(x) #ifdef x +// with the C preprocessor. +#if RMT_ENABLED + #define IFDEF_RMT_ENABLED(t, f) t +#else + #define IFDEF_RMT_ENABLED(t, f) f +#endif +#if RMT_ENABLED && RMT_USE_CUDA + #define IFDEF_RMT_USE_CUDA(t, f) t +#else + #define IFDEF_RMT_USE_CUDA(t, f) f +#endif +#if RMT_ENABLED && RMT_USE_D3D11 + #define IFDEF_RMT_USE_D3D11(t, f) t +#else + #define IFDEF_RMT_USE_D3D11(t, f) f +#endif +#if RMT_ENABLED && RMT_USE_D3D12 + #define IFDEF_RMT_USE_D3D12(t, f) t +#else + #define IFDEF_RMT_USE_D3D12(t, f) f +#endif +#if RMT_ENABLED && RMT_USE_OPENGL + #define IFDEF_RMT_USE_OPENGL(t, f) t +#else + #define IFDEF_RMT_USE_OPENGL(t, f) f +#endif +#if RMT_ENABLED && RMT_USE_METAL + #define IFDEF_RMT_USE_METAL(t, f) t +#else + #define IFDEF_RMT_USE_METAL(t, f) f +#endif +#if RMT_ENABLED && RMT_USE_VULKAN + #define IFDEF_RMT_USE_VULKAN(t, f) t +#else + #define IFDEF_RMT_USE_VULKAN(t, f) f +#endif + + +// Public interface is written in terms of these macros to easily enable/disable itself +#define RMT_OPTIONAL(macro, x) IFDEF_ ## macro(x, ) +#define RMT_OPTIONAL_RET(macro, x, y) IFDEF_ ## macro(x, (y)) + + +/*-------------------------------------------------------------------------------------------------------------------------------- + Types +--------------------------------------------------------------------------------------------------------------------------------*/ + + +// Boolean +typedef unsigned int rmtBool; +#define RMT_TRUE ((rmtBool)1) +#define RMT_FALSE ((rmtBool)0) + +// Unsigned integer types +typedef unsigned char rmtU8; +typedef unsigned short rmtU16; +typedef unsigned int rmtU32; +typedef unsigned long long rmtU64; + +// Signed integer types +typedef char rmtS8; +typedef short rmtS16; +typedef int rmtS32; +typedef long long rmtS64; + +// Float types +typedef float rmtF32; +typedef double rmtF64; + +// Const, null-terminated string pointer +typedef const char* rmtPStr; + +// Opaque pointer for a sample graph tree +typedef struct Msg_SampleTree rmtSampleTree; + +// Opaque pointer to a node in the sample graph tree +typedef struct Sample rmtSample; + +// Handle to the main remotery instance +typedef struct Remotery Remotery; + +// Forward declaration +struct rmtProperty; + +typedef enum rmtSampleType +{ + RMT_SampleType_CPU, + RMT_SampleType_CUDA, + RMT_SampleType_D3D11, + RMT_SampleType_D3D12, + RMT_SampleType_OpenGL, + RMT_SampleType_Metal, + RMT_SampleType_Vulkan, + RMT_SampleType_Count, +} rmtSampleType; + +// All possible error codes +// clang-format off +typedef enum rmtError +{ + RMT_ERROR_NONE, + RMT_ERROR_RECURSIVE_SAMPLE, // Not an error but an internal message to calling code + RMT_ERROR_UNKNOWN, // An error with a message yet to be defined, only for internal error handling + RMT_ERROR_INVALID_INPUT, // An invalid input to a function call was provided + RMT_ERROR_RESOURCE_CREATE_FAIL, // Creation of an internal resource failed + RMT_ERROR_RESOURCE_ACCESS_FAIL, // Access of an internal resource failed + RMT_ERROR_TIMEOUT, // Internal system timeout + + // System errors + RMT_ERROR_MALLOC_FAIL, // Malloc call within remotery failed + RMT_ERROR_TLS_ALLOC_FAIL, // Attempt to allocate thread local storage failed + RMT_ERROR_VIRTUAL_MEMORY_BUFFER_FAIL, // Failed to create a virtual memory mirror buffer + RMT_ERROR_CREATE_THREAD_FAIL, // Failed to create a thread for the server + RMT_ERROR_OPEN_THREAD_HANDLE_FAIL, // Failed to open a thread handle, given a thread id + + // Network TCP/IP socket errors + RMT_ERROR_SOCKET_INVALID_POLL, // Poll attempt on an invalid socket + RMT_ERROR_SOCKET_SELECT_FAIL, // Server failed to call select on socket + RMT_ERROR_SOCKET_POLL_ERRORS, // Poll notified that the socket has errors + RMT_ERROR_SOCKET_SEND_FAIL, // Unrecoverable error occured while client/server tried to send data + RMT_ERROR_SOCKET_RECV_NO_DATA, // No data available when attempting a receive + RMT_ERROR_SOCKET_RECV_TIMEOUT, // Timed out trying to receive data + RMT_ERROR_SOCKET_RECV_FAILED, // Unrecoverable error occured while client/server tried to receive data + + // WebSocket errors + RMT_ERROR_WEBSOCKET_HANDSHAKE_NOT_GET, // WebSocket server handshake failed, not HTTP GET + RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_VERSION, // WebSocket server handshake failed, can't locate WebSocket version + RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_VERSION, // WebSocket server handshake failed, unsupported WebSocket version + RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_HOST, // WebSocket server handshake failed, can't locate host + RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_HOST, // WebSocket server handshake failed, host is not allowed to connect + RMT_ERROR_WEBSOCKET_HANDSHAKE_NO_KEY, // WebSocket server handshake failed, can't locate WebSocket key + RMT_ERROR_WEBSOCKET_HANDSHAKE_BAD_KEY, // WebSocket server handshake failed, WebSocket key is ill-formed + RMT_ERROR_WEBSOCKET_HANDSHAKE_STRING_FAIL, // WebSocket server handshake failed, internal error, bad string code + RMT_ERROR_WEBSOCKET_DISCONNECTED, // WebSocket server received a disconnect request and closed the socket + RMT_ERROR_WEBSOCKET_BAD_FRAME_HEADER, // Couldn't parse WebSocket frame header + RMT_ERROR_WEBSOCKET_BAD_FRAME_HEADER_SIZE, // Partially received wide frame header size + RMT_ERROR_WEBSOCKET_BAD_FRAME_HEADER_MASK, // Partially received frame header data mask + RMT_ERROR_WEBSOCKET_RECEIVE_TIMEOUT, // Timeout receiving frame header + + RMT_ERROR_REMOTERY_NOT_CREATED, // Remotery object has not been created + RMT_ERROR_SEND_ON_INCOMPLETE_PROFILE, // An attempt was made to send an incomplete profile tree to the client + + // CUDA error messages + RMT_ERROR_CUDA_DEINITIALIZED, // This indicates that the CUDA driver is in the process of shutting down + RMT_ERROR_CUDA_NOT_INITIALIZED, // This indicates that the CUDA driver has not been initialized with cuInit() or that initialization has failed + RMT_ERROR_CUDA_INVALID_CONTEXT, // This most frequently indicates that there is no context bound to the current thread + RMT_ERROR_CUDA_INVALID_VALUE, // This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values + RMT_ERROR_CUDA_INVALID_HANDLE, // This indicates that a resource handle passed to the API call was not valid + RMT_ERROR_CUDA_OUT_OF_MEMORY, // The API call failed because it was unable to allocate enough memory to perform the requested operation + RMT_ERROR_ERROR_NOT_READY, // This indicates that a resource handle passed to the API call was not valid + + // Direct3D 11 error messages + RMT_ERROR_D3D11_FAILED_TO_CREATE_QUERY, // Failed to create query for sample + + // OpenGL error messages + RMT_ERROR_OPENGL_ERROR, // Generic OpenGL error, no need to expose detail since app will need an OpenGL error callback registered + + RMT_ERROR_CUDA_UNKNOWN, +} rmtError; +// clang-format on + +#ifdef __cplusplus +extern "C" { +#endif + + // Gets the last error message issued on the calling thread + RMT_API rmtPStr rmt_GetLastErrorMessage(); + +#ifdef __cplusplus +} +#endif + + + + +/*-------------------------------------------------------------------------------------------------------------------------------- + Runtime Settings +--------------------------------------------------------------------------------------------------------------------------------*/ + + +// Callback function pointer types +typedef void* (*rmtMallocPtr)(void* mm_context, rmtU32 size); +typedef void* (*rmtReallocPtr)(void* mm_context, void* ptr, rmtU32 size); +typedef void (*rmtFreePtr)(void* mm_context, void* ptr); +typedef void (*rmtInputHandlerPtr)(const char* text, void* context); +typedef void (*rmtSampleTreeHandlerPtr)(void* cbk_context, rmtSampleTree* sample_tree); +typedef void (*rmtPropertyHandlerPtr)(void* cbk_context, struct rmtProperty* root); + +// Struture to fill in to modify Remotery default settings +typedef struct rmtSettings +{ + // Which port to listen for incoming connections on + rmtU16 port; + + // When this server exits it can leave the port open in TIME_WAIT state for a while. This forces + // subsequent server bind attempts to fail when restarting. If you find restarts fail repeatedly + // with bind attempts, set this to true to forcibly reuse the open port. + rmtBool reuse_open_port; + + // Only allow connections on localhost? + // For dev builds you may want to access your game from other devices but if + // you distribute a game to your players with Remotery active, probably best + // to limit connections to localhost. + rmtBool limit_connections_to_localhost; + + // Whether to enable runtime thread sampling that discovers which processors a thread is running + // on. This will suspend and resume threads from outside repeatdly and inject code into each + // thread that automatically instruments the processor. + // Default: Enabled + rmtBool enableThreadSampler; + + // How long to sleep between server updates, hopefully trying to give + // a little CPU back to other threads. + rmtU32 msSleepBetweenServerUpdates; + + // Size of the internal message queues Remotery uses + // Will be rounded to page granularity of 64k + rmtU32 messageQueueSizeInBytes; + + // If the user continuously pushes to the message queue, the server network + // code won't get a chance to update unless there's an upper-limit on how + // many messages can be consumed per loop. + rmtU32 maxNbMessagesPerUpdate; + + // Callback pointers for memory allocation + rmtMallocPtr malloc; + rmtReallocPtr realloc; + rmtFreePtr free; + void* mm_context; + + // Callback pointer for receiving input from the Remotery console + rmtInputHandlerPtr input_handler; + + // Callback pointer for traversing the sample tree graph + rmtSampleTreeHandlerPtr sampletree_handler; + void* sampletree_context; + + // Callback pointer for traversing the prpperty graph + rmtPropertyHandlerPtr snapshot_callback; + void* snapshot_context; + + // Context pointer that gets sent to Remotery console callback function + void* input_handler_context; + + rmtPStr logPath; +} rmtSettings; + +// Retrieve and configure the global rmtSettings object; returns `rmtSettings*`. +// This can be done before or after Remotery is initialised, however some fields are only referenced on initialisation. +#define rmt_Settings() \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_Settings(), NULL ) + + +/*-------------------------------------------------------------------------------------------------------------------------------- + Initialisation/Shutdown +--------------------------------------------------------------------------------------------------------------------------------*/ + + +// Can call remotery functions on a null pointer +// TODO: Can embed extern "C" in these macros? + +// Initialises Remotery and sets its internal global instance pointer. +// Parameter is `Remotery**`, returning you the pointer for further use. +#define rmt_CreateGlobalInstance(rmt) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_CreateGlobalInstance(rmt), RMT_ERROR_NONE) + +// Shutsdown Remotery, requiring its pointer to be passed to ensure you are destroying the correct instance. +#define rmt_DestroyGlobalInstance(rmt) \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_DestroyGlobalInstance(rmt)) + +// For use in the presence of DLLs/SOs if each of them are linking Remotery statically. +// If Remotery is hosted in its own DLL and linked dynamically then there is no need to use this. +// Otherwise, pass the result of `rmt_CreateGlobalInstance` from your main DLL to this in your other DLLs. +#define rmt_SetGlobalInstance(rmt) \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_SetGlobalInstance(rmt)) + +// Get a pointer to the current global Remotery instance. +#define rmt_GetGlobalInstance() \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_GetGlobalInstance(), NULL) + + +/*-------------------------------------------------------------------------------------------------------------------------------- + CPU Sampling +--------------------------------------------------------------------------------------------------------------------------------*/ + + +#define rmt_SetCurrentThreadName(rmt) \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_SetCurrentThreadName(rmt)) + +#define rmt_LogText(text) \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_LogText(text)) + +#define rmt_BeginCPUSample(name, flags) \ + RMT_OPTIONAL(RMT_ENABLED, { \ + static rmtU32 rmt_sample_hash_##name = 0; \ + _rmt_BeginCPUSample(#name, flags, &rmt_sample_hash_##name); \ + }) + +#define rmt_BeginCPUSampleDynamic(namestr, flags) \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_BeginCPUSample(namestr, flags, NULL)) + +#define rmt_EndCPUSample() \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_EndCPUSample()) + +// Used for both CPU and GPU profiling +// Essential to call this every frame, ever since D3D12/Vulkan support was added +// D3D12/Vulkan Requirements: Don't sample any command lists that begin before this call and end after it +#define rmt_MarkFrame() \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_MarkFrame(), RMT_ERROR_NONE) + + +/*-------------------------------------------------------------------------------------------------------------------------------- + GPU Sampling +--------------------------------------------------------------------------------------------------------------------------------*/ + + +// Structure to fill in when binding CUDA to Remotery +typedef struct rmtCUDABind +{ + // The main context that all driver functions apply before each call + void* context; + + // Driver API function pointers that need to be pointed to + // Untyped so that the CUDA headers are not required in this file + // NOTE: These are named differently to the CUDA functions because the CUDA API has a habit of using + // macros to point function calls to different versions, e.g. cuEventDestroy is a macro for + // cuEventDestroy_v2. + void* CtxSetCurrent; + void* CtxGetCurrent; + void* EventCreate; + void* EventDestroy; + void* EventRecord; + void* EventQuery; + void* EventElapsedTime; + +} rmtCUDABind; + +// Call once after you've initialised CUDA to bind it to Remotery +#define rmt_BindCUDA(bind) \ + RMT_OPTIONAL(RMT_USE_CUDA, _rmt_BindCUDA(bind)) + +// Mark the beginning of a CUDA sample on the specified asynchronous stream +#define rmt_BeginCUDASample(name, stream) \ + RMT_OPTIONAL(RMT_USE_CUDA, { \ + static rmtU32 rmt_sample_hash_##name = 0; \ + _rmt_BeginCUDASample(#name, &rmt_sample_hash_##name, stream); \ + }) + +// Mark the end of a CUDA sample on the specified asynchronous stream +#define rmt_EndCUDASample(stream) \ + RMT_OPTIONAL(RMT_USE_CUDA, _rmt_EndCUDASample(stream)) + + +#define rmt_BindD3D11(device, context) \ + RMT_OPTIONAL(RMT_USE_D3D11, _rmt_BindD3D11(device, context)) + +#define rmt_UnbindD3D11() \ + RMT_OPTIONAL(RMT_USE_D3D11, _rmt_UnbindD3D11()) + +#define rmt_BeginD3D11Sample(name) \ + RMT_OPTIONAL(RMT_USE_D3D11, { \ + static rmtU32 rmt_sample_hash_##name = 0; \ + _rmt_BeginD3D11Sample(#name, &rmt_sample_hash_##name); \ + }) + +#define rmt_BeginD3D11SampleDynamic(namestr) \ + RMT_OPTIONAL(RMT_USE_D3D11, _rmt_BeginD3D11Sample(namestr, NULL)) + +#define rmt_EndD3D11Sample() \ + RMT_OPTIONAL(RMT_USE_D3D11, _rmt_EndD3D11Sample()) + + +typedef struct rmtD3D12Bind +{ + // The main device shared by all threads + void* device; + + // The queue command lists are executed on for profiling + void* queue; + +} rmtD3D12Bind; + +// Create a D3D12 binding for the given device/queue pair +#define rmt_BindD3D12(device, queue, out_bind) \ + RMT_OPTIONAL_RET(RMT_USE_D3D12, _rmt_BindD3D12(device, queue, out_bind), NULL) + +#define rmt_UnbindD3D12(bind) \ + RMT_OPTIONAL(RMT_USE_D3D12, _rmt_UnbindD3D12(bind)) + +#define rmt_BeginD3D12Sample(bind, command_list, name) \ + RMT_OPTIONAL(RMT_USE_D3D12, { \ + static rmtU32 rmt_sample_hash_##name = 0; \ + _rmt_BeginD3D12Sample(bind, command_list, #name, &rmt_sample_hash_##name); \ + }) + +#define rmt_BeginD3D12SampleDynamic(bind, command_list, namestr) \ + RMT_OPTIONAL(RMT_USE_D3D12, _rmt_BeginD3D12Sample(bind, command_list, namestr, NULL)) + +#define rmt_EndD3D12Sample() \ + RMT_OPTIONAL(RMT_USE_D3D12, _rmt_EndD3D12Sample()) + + +#define rmt_BindOpenGL() \ + RMT_OPTIONAL(RMT_USE_OPENGL, _rmt_BindOpenGL()) + +#define rmt_UnbindOpenGL() \ + RMT_OPTIONAL(RMT_USE_OPENGL, _rmt_UnbindOpenGL()) + +#define rmt_BeginOpenGLSample(name) \ + RMT_OPTIONAL(RMT_USE_OPENGL, { \ + static rmtU32 rmt_sample_hash_##name = 0; \ + _rmt_BeginOpenGLSample(#name, &rmt_sample_hash_##name); \ + }) + +#define rmt_BeginOpenGLSampleDynamic(namestr) \ + RMT_OPTIONAL(RMT_USE_OPENGL, _rmt_BeginOpenGLSample(namestr, NULL)) + +#define rmt_EndOpenGLSample() \ + RMT_OPTIONAL(RMT_USE_OPENGL, _rmt_EndOpenGLSample()) + + +#define rmt_BindMetal(command_buffer) \ + RMT_OPTIONAL(RMT_USE_METAL, _rmt_BindMetal(command_buffer)); + +#define rmt_UnbindMetal() \ + RMT_OPTIONAL(RMT_USE_METAL, _rmt_UnbindMetal()); + +#define rmt_BeginMetalSample(name) \ + RMT_OPTIONAL(RMT_USE_METAL, { \ + static rmtU32 rmt_sample_hash_##name = 0; \ + _rmt_BeginMetalSample(#name, &rmt_sample_hash_##name); \ + }) + +#define rmt_BeginMetalSampleDynamic(namestr) \ + RMT_OPTIONAL(RMT_USE_METAL, _rmt_BeginMetalSample(namestr, NULL)) + +#define rmt_EndMetalSample() \ + RMT_OPTIONAL(RMT_USE_METAL, _rmt_EndMetalSample()) + + +typedef struct rmtVulkanFunctions +{ + // Function pointers to Vulkan functions + // Untyped so that the Vulkan headers are not required in this file + + // Instance functions + void* vkGetPhysicalDeviceProperties; + + // Device functions + void* vkQueueSubmit; + void* vkQueueWaitIdle; + void* vkCreateQueryPool; + void* vkDestroyQueryPool; + void* vkResetQueryPool; // vkResetQueryPool (Vulkan 1.2+ with hostQueryReset) or vkResetQueryPoolEXT (VK_EXT_host_query_reset) + void* vkGetQueryPoolResults; + void* vkCmdWriteTimestamp; + void* vkCreateSemaphore; + void* vkDestroySemaphore; + void* vkSignalSemaphore; // vkSignalSemaphore (Vulkan 1.2+ with timelineSemaphore) or vkSignalSemaphoreKHR (VK_KHR_timeline_semaphore) + void* vkGetSemaphoreCounterValue; // vkGetSemaphoreCounterValue (Vulkan 1.2+ with timelineSemaphore) or vkGetSemaphoreCounterValueKHR (VK_KHR_timeline_semaphore) + void* vkGetCalibratedTimestampsEXT; // vkGetCalibratedTimestampsKHR (VK_KHR_calibrated_timestamps) or vkGetCalibratedTimestampsEXT (VK_EXT_calibrated_timestamps) + +} rmtVulkanFunctions; + +typedef struct rmtVulkanBind +{ + // The physical Vulkan device, of type VkPhysicalDevice + void* physical_device; + + // The logical Vulkan device, of type VkDevice + void* device; + + // The queue command buffers are executed on for profiling, of type VkQueue + void* queue; + +} rmtVulkanBind; + +// Create a Vulkan binding for the given device/queue pair +#define rmt_BindVulkan(instance, physical_device, device, queue, funcs, out_bind) \ + RMT_OPTIONAL_RET(RMT_USE_VULKAN, _rmt_BindVulkan(instance, physical_device, device, queue, funcs, out_bind), RMT_ERROR_NONE) + +#define rmt_UnbindVulkan(bind) \ + RMT_OPTIONAL(RMT_USE_VULKAN, _rmt_UnbindVulkan(bind)) + +#define rmt_BeginVulkanSample(bind, command_buffer, name) \ + RMT_OPTIONAL(RMT_USE_VULKAN, { \ + static rmtU32 rmt_sample_hash_##name = 0; \ + _rmt_BeginVulkanSample(bind, command_buffer, #name, &rmt_sample_hash_##name); \ + }) + +#define rmt_BeginVulkanSampleDynamic(bind, command_buffer, namestr) \ + RMT_OPTIONAL(RMT_USE_VULKAN, _rmt_BeginVulkanSample(bind, command_buffer, namestr, NULL)) + +#define rmt_EndVulkanSample() \ + RMT_OPTIONAL(RMT_USE_VULKAN, _rmt_EndVulkanSample()) + + +/*-------------------------------------------------------------------------------------------------------------------------------- + Runtime Properties +--------------------------------------------------------------------------------------------------------------------------------*/ + + +/* --- Public API --------------------------------------------------------------------------------------------------------------*/ + + +// Flags that control property behaviour +typedef enum +{ + RMT_PropertyFlags_NoFlags = 0, + + // Reset property back to its default value on each new frame + RMT_PropertyFlags_FrameReset = 1, +} rmtPropertyFlags; + +// All possible property types that can be recorded and sent to the viewer +typedef enum +{ + RMT_PropertyType_rmtGroup, + RMT_PropertyType_rmtBool, + RMT_PropertyType_rmtS32, + RMT_PropertyType_rmtU32, + RMT_PropertyType_rmtF32, + RMT_PropertyType_rmtS64, + RMT_PropertyType_rmtU64, + RMT_PropertyType_rmtF64, +} rmtPropertyType; + +// A property value as a union of all its possible types +typedef union rmtPropertyValue +{ + // C++ requires function-based construction of property values because it has no designated initialiser support until C++20 + #ifdef __cplusplus + // These are static Make calls, rather than overloaded constructors, because `rmtBool` is the same type as `rmtU32` + static rmtPropertyValue MakeBool(rmtBool v) { rmtPropertyValue pv; pv.Bool = v; return pv; } + static rmtPropertyValue MakeS32(rmtS32 v) { rmtPropertyValue pv; pv.S32 = v; return pv; } + static rmtPropertyValue MakeU32(rmtU32 v) { rmtPropertyValue pv; pv.U32 = v; return pv; } + static rmtPropertyValue MakeF32(rmtF32 v) { rmtPropertyValue pv; pv.F32 = v; return pv; } + static rmtPropertyValue MakeS64(rmtS64 v) { rmtPropertyValue pv; pv.S64 = v; return pv; } + static rmtPropertyValue MakeU64(rmtU64 v) { rmtPropertyValue pv; pv.U64 = v; return pv; } + static rmtPropertyValue MakeF64(rmtF64 v) { rmtPropertyValue pv; pv.F64 = v; return pv; } + #endif + + rmtBool Bool; + rmtS32 S32; + rmtU32 U32; + rmtF32 F32; + rmtS64 S64; + rmtU64 U64; + rmtF64 F64; +} rmtPropertyValue; + +// Definition of a property that should be stored globally +// Note: +// Use the callback api and the rmt_PropertyGetxxx accessors to traverse this structure +typedef struct rmtProperty +{ + // Gets set to RMT_TRUE after a property has been modified, when it gets initialised for the first time + rmtBool initialised; + + // Runtime description + rmtPropertyType type; + rmtPropertyFlags flags; + + // Current value + rmtPropertyValue value; + + // Last frame value to see if previous value needs to be updated + rmtPropertyValue lastFrameValue; + + // Previous value only if it's different from the current value, and when it changed + rmtPropertyValue prevValue; + rmtU32 prevValueFrame; + + // Text description + const char* name; + const char* description; + + // Default value for Reset calls + rmtPropertyValue defaultValue; + + // Parent link specifically placed after default value so that variadic macro can initialise it + struct rmtProperty* parent; + + // Links within the property tree + struct rmtProperty* firstChild; + struct rmtProperty* lastChild; + struct rmtProperty* nextSibling; + + // Hash for efficient sending of properties to the viewer + rmtU32 nameHash; + + // Unique, persistent ID among all properties + rmtU32 uniqueID; +} rmtProperty; + +// Define properties of different types at global scope: +// +// * Never define properties in a header file that gets included multiple times. +// * The property gets defined exactly as `name` in the global scope. +// * `flag` is specified without the `RMT_PropertyFlags_` prefix. +// * Property parents are optional and can be specified as the last parameter, referencing `&name`. +// +#define rmt_PropertyDefine_Group(name, desc, ...) _rmt_PropertyDefine(rmtGroup, name, _rmt_MakePropertyValue(Bool, 0), RMT_PropertyFlags_NoFlags, desc, __VA_ARGS__) +#define rmt_PropertyDefine_Bool(name, default_value, flag, desc, ...) _rmt_PropertyDefine(rmtBool, name, _rmt_MakePropertyValue(Bool, default_value), RMT_PropertyFlags_##flag, desc, __VA_ARGS__) +#define rmt_PropertyDefine_S32(name, default_value, flag, desc, ...) _rmt_PropertyDefine(rmtS32, name, _rmt_MakePropertyValue(S32, default_value), RMT_PropertyFlags_##flag, desc, __VA_ARGS__) +#define rmt_PropertyDefine_U32(name, default_value, flag, desc, ...) _rmt_PropertyDefine(rmtU32, name, _rmt_MakePropertyValue(U32, default_value), RMT_PropertyFlags_##flag, desc, __VA_ARGS__) +#define rmt_PropertyDefine_F32(name, default_value, flag, desc, ...) _rmt_PropertyDefine(rmtF32, name, _rmt_MakePropertyValue(F32, default_value), RMT_PropertyFlags_##flag, desc, __VA_ARGS__) +#define rmt_PropertyDefine_S64(name, default_value, flag, desc, ...) _rmt_PropertyDefine(rmtS64, name, _rmt_MakePropertyValue(S64, default_value), RMT_PropertyFlags_##flag, desc, __VA_ARGS__) +#define rmt_PropertyDefine_U64(name, default_value, flag, desc, ...) _rmt_PropertyDefine(rmtU64, name, _rmt_MakePropertyValue(U64, default_value), RMT_PropertyFlags_##flag, desc, __VA_ARGS__) +#define rmt_PropertyDefine_F64(name, default_value, flag, desc, ...) _rmt_PropertyDefine(rmtF64, name, _rmt_MakePropertyValue(F64, default_value), RMT_PropertyFlags_##flag, desc, __VA_ARGS__) + +// As properties need to be defined at global scope outside header files, use this to declare properties in header files to be +// modified in other translation units. +// +// If you don't want to include Remotery.h in your shared header you can forward declare the `rmtProperty` type and then forward +// declare the property name yourself. +#define rmt_PropertyExtern(name) extern rmtProperty name; + +// Set properties to the given value +#define rmt_PropertySet_Bool(name, set_value) _rmt_PropertySet(Bool, name, set_value) +#define rmt_PropertySet_S32(name, set_value) _rmt_PropertySet(S32, name, set_value) +#define rmt_PropertySet_U32(name, set_value) _rmt_PropertySet(U32, name, set_value) +#define rmt_PropertySet_F32(name, set_value) _rmt_PropertySet(F32, name, set_value) +#define rmt_PropertySet_S64(name, set_value) _rmt_PropertySet(S64, name, set_value) +#define rmt_PropertySet_U64(name, set_value) _rmt_PropertySet(U64, name, set_value) +#define rmt_PropertySet_F64(name, set_value) _rmt_PropertySet(F64, name, set_value) + +// Add the given value to properties +#define rmt_PropertyAdd_S32(name, add_value) _rmt_PropertyAdd(S32, name, add_value) +#define rmt_PropertyAdd_U32(name, add_value) _rmt_PropertyAdd(U32, name, add_value) +#define rmt_PropertyAdd_F32(name, add_value) _rmt_PropertyAdd(F32, name, add_value) +#define rmt_PropertyAdd_S64(name, add_value) _rmt_PropertyAdd(S64, name, add_value) +#define rmt_PropertyAdd_U64(name, add_value) _rmt_PropertyAdd(U64, name, add_value) +#define rmt_PropertyAdd_F64(name, add_value) _rmt_PropertyAdd(F64, name, add_value) + +// Reset properties to their default value +#define rmt_PropertyReset(name) \ + { \ + name.value = name.defaultValue; \ + _rmt_PropertySetValue(&name); \ + } + +// Send all properties and their values to the viewer and log to file +#define rmt_PropertySnapshotAll() _rmt_PropertySnapshotAll() + +// Reset all RMT_PropertyFlags_FrameReset properties to their default value +#define rmt_PropertyFrameResetAll() _rmt_PropertyFrameResetAll() + +/* --- Private Details ---------------------------------------------------------------------------------------------------------*/ + + +// Used to define properties from typed macro callers +#define _rmt_PropertyDefine(type, name, default_value, flags, desc, ...) \ + rmtProperty name = { RMT_FALSE, RMT_PropertyType_##type, flags, default_value, default_value, default_value, 0, #name, desc, default_value, __VA_ARGS__ }; + +// C++ doesn't support designated initialisers until C++20 +// Worth checking for C++ designated initialisers to remove the function call in debug builds +#ifdef __cplusplus +#define _rmt_MakePropertyValue(field, value) rmtPropertyValue::Make##field(value) +#else +#define _rmt_MakePropertyValue(field, value) { .field = value } +#endif + +// Used to set properties from typed macro callers +#define _rmt_PropertySet(field, name, set_value) \ + { \ + name.value.field = set_value; \ + _rmt_PropertySetValue(&name); \ + } + +// Used to add properties from typed macro callers +#define _rmt_PropertyAdd(field, name, add_value) \ + { \ + name.value.field += add_value; \ + rmtPropertyValue delta_value = _rmt_MakePropertyValue(field, add_value); \ + _rmt_PropertyAddValue(&name, delta_value); \ + } + + +#ifdef __cplusplus +extern "C" { +#endif + +RMT_API void _rmt_PropertySetValue(rmtProperty* property); +RMT_API void _rmt_PropertyAddValue(rmtProperty* property, rmtPropertyValue add_value); +RMT_API rmtError _rmt_PropertySnapshotAll(); +RMT_API void _rmt_PropertyFrameResetAll(); +RMT_API rmtU32 _rmt_HashString32(const char* s, int len, rmtU32 seed); + +#ifdef __cplusplus +} +#endif + + +/*-------------------------------------------------------------------------------------------------------------------------------- + Sample Tree API for walking `rmtSampleTree` Objects in the Sample Tree Handler. +--------------------------------------------------------------------------------------------------------------------------------*/ + + +typedef enum rmtSampleFlags +{ + // Default behaviour + RMTSF_None = 0, + + // Search parent for same-named samples and merge timing instead of adding a new sample + RMTSF_Aggregate = 1, + + // Merge sample with parent if it's the same sample + RMTSF_Recursive = 2, + + // Set this flag on any of your root samples so that Remotery will assert if it ends up *not* being the root sample. + // This will quickly allow you to detect Begin/End mismatches causing a sample tree imbalance. + RMTSF_Root = 4, + + // Mainly for platforms other than Windows that don't support the thread sampler and can't detect stalling samples. + // Where you have a non-root sample that stays open indefinitely and never sends its contents to log/viewer. + // Send this sample to log/viewer when it closes. + // You can not have more than one sample open with this flag on the same thread at a time. + // This flag will be removed in a future version when all platforms support stalling samples. + RMTSF_SendOnClose = 8, +} rmtSampleFlags; + +// Struct to hold iterator info +typedef struct rmtSampleIterator +{ +// public + rmtSample* sample; +// private + rmtSample* initial; +} rmtSampleIterator; + +#define rmt_IterateChildren(iter, sample) \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_IterateChildren(iter, sample)) + +#define rmt_IterateNext(iter) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_IterateNext(iter), RMT_FALSE) + +#define rmt_SampleTreeGetThreadName(sample_tree) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleTreeGetThreadName(sample_tree), NULL) + +#define rmt_SampleTreeGetRootSample(sample_tree) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleTreeGetRootSample(sample_tree), NULL) + +// Should only called from within the sample tree callback, +// when the internal string lookup table is valid (i.e. on the main Remotery thread) +#define rmt_SampleGetName(sample) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleGetName(sample), NULL) + +#define rmt_SampleGetNameHash(sample) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleGetNameHash(sample), 0U) + +#define rmt_SampleGetCallCount(sample) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleGetCallCount(sample), 0U) + +#define rmt_SampleGetStart(sample) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleGetStart(sample), 0LLU) + +#define rmt_SampleGetTime(sample) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleGetTime(sample), 0LLU) + +#define rmt_SampleGetSelfTime(sample) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleGetSelfTime(sample), 0LLU) + +#define rmt_SampleGetColour(sample, r, g, b) \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_SampleGetColour(sample, r, g, b)) + +#define rmt_SampleGetType(sample) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_SampleGetType(sample), RMT_SampleType_Count) + + +// Struct to hold iterator info +typedef struct rmtPropertyIterator +{ +// public + rmtProperty* property; +// private + rmtProperty* initial; +} rmtPropertyIterator; + +#define rmt_PropertyIterateChildren(iter, property) \ + RMT_OPTIONAL(RMT_ENABLED, _rmt_PropertyIterateChildren(iter, property)) + +#define rmt_PropertyIterateNext(iter) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_PropertyIterateNext(iter), RMT_FALSE) + +// Should only called from within the property callback, +// when the internal string lookup table is valid (i.e. on the main Remotery thread) + +#define rmt_PropertyGetType(property) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_PropertyGetType(property), RMT_PropertyType_Count) + +#define rmt_PropertyGetName(property) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_PropertyGetName(property), NULL) + +#define rmt_PropertyGetDescription(property) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_PropertyGetDescription(property), 0U) + +#define rmt_PropertyGetValue(property) \ + RMT_OPTIONAL_RET(RMT_ENABLED, _rmt_PropertyGetValue(property), 0U) + + + +/*-------------------------------------------------------------------------------------------------------------------------------- + C++ Public Interface Extensions +--------------------------------------------------------------------------------------------------------------------------------*/ + + +#ifdef __cplusplus + + +#if RMT_ENABLED + +// Types that end samples in their destructors +extern "C" RMT_API void _rmt_EndCPUSample(void); +struct rmt_EndCPUSampleOnScopeExit +{ + ~rmt_EndCPUSampleOnScopeExit() + { + _rmt_EndCPUSample(); + } +}; + +#if RMT_USE_CUDA +extern "C" RMT_API void _rmt_EndCUDASample(void* stream); +struct rmt_EndCUDASampleOnScopeExit +{ + rmt_EndCUDASampleOnScopeExit(void* stream) : stream(stream) + { + } + ~rmt_EndCUDASampleOnScopeExit() + { + _rmt_EndCUDASample(stream); + } + void* stream; +}; + +#endif +#if RMT_USE_D3D11 +extern "C" RMT_API void _rmt_EndD3D11Sample(void); +struct rmt_EndD3D11SampleOnScopeExit +{ + ~rmt_EndD3D11SampleOnScopeExit() + { + _rmt_EndD3D11Sample(); + } +}; +#endif + +#if RMT_USE_D3D12 +extern "C" RMT_API void _rmt_EndD3D12Sample(); +struct rmt_EndD3D12SampleOnScopeExit +{ + ~rmt_EndD3D12SampleOnScopeExit() + { + _rmt_EndD3D12Sample(); + } +}; +#endif + +#if RMT_USE_OPENGL +extern "C" RMT_API void _rmt_EndOpenGLSample(void); +struct rmt_EndOpenGLSampleOnScopeExit +{ + ~rmt_EndOpenGLSampleOnScopeExit() + { + _rmt_EndOpenGLSample(); + } +}; +#endif + +#if RMT_USE_METAL +extern "C" RMT_API void _rmt_EndMetalSample(void); +struct rmt_EndMetalSampleOnScopeExit +{ + ~rmt_EndMetalSampleOnScopeExit() + { + _rmt_EndMetalSample(); + } +}; +#endif + +#if RMT_USE_VULKAN +extern "C" RMT_API void _rmt_EndVulkanSample(); +struct rmt_EndVulkanSampleOnScopeExit +{ + ~rmt_EndVulkanSampleOnScopeExit() + { + _rmt_EndVulkanSample(); + } +}; +#endif + +#endif + + +// Pairs a call to rmt_BeginSample with its call to rmt_EndSample when leaving scope +#define rmt_ScopedCPUSample(name, flags) \ + RMT_OPTIONAL(RMT_ENABLED, rmt_BeginCPUSample(name, flags)); \ + RMT_OPTIONAL(RMT_ENABLED, rmt_EndCPUSampleOnScopeExit rmt_ScopedCPUSample##name); +#define rmt_ScopedCUDASample(name, stream) \ + RMT_OPTIONAL(RMT_USE_CUDA, rmt_BeginCUDASample(name, stream)); \ + RMT_OPTIONAL(RMT_USE_CUDA, rmt_EndCUDASampleOnScopeExit rmt_ScopedCUDASample##name(stream)); +#define rmt_ScopedD3D11Sample(name) \ + RMT_OPTIONAL(RMT_USE_D3D11, rmt_BeginD3D11Sample(name)); \ + RMT_OPTIONAL(RMT_USE_D3D11, rmt_EndD3D11SampleOnScopeExit rmt_ScopedD3D11Sample##name); +#define rmt_ScopedD3D12Sample(bind, command_list, name) \ + RMT_OPTIONAL(RMT_USE_D3D12, rmt_BeginD3D12Sample(bind, command_list, name)); \ + RMT_OPTIONAL(RMT_USE_D3D12, rmt_EndD3D12SampleOnScopeExit rmt_ScopedD3D12Sample##name); +#define rmt_ScopedOpenGLSample(name) \ + RMT_OPTIONAL(RMT_USE_OPENGL, rmt_BeginOpenGLSample(name)); \ + RMT_OPTIONAL(RMT_USE_OPENGL, rmt_EndOpenGLSampleOnScopeExit rmt_ScopedOpenGLSample##name); +#define rmt_ScopedMetalSample(name) \ + RMT_OPTIONAL(RMT_USE_METAL, rmt_BeginMetalSample(name)); \ + RMT_OPTIONAL(RMT_USE_METAL, rmt_EndMetalSampleOnScopeExit rmt_ScopedMetalSample##name); +#define rmt_ScopedVulkanSample(bind, command_buffer, name) \ + RMT_OPTIONAL(RMT_USE_VULKAN, rmt_BeginVulkanSample(bind, command_buffer, name)); \ + RMT_OPTIONAL(RMT_USE_VULKAN, rmt_EndVulkanSampleOnScopeExit rmt_ScopedVulkanSample##name); + +#endif + + +/*-------------------------------------------------------------------------------------------------------------------------------- + Private Interface - don't directly call these +--------------------------------------------------------------------------------------------------------------------------------*/ + + +#if RMT_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +RMT_API rmtSettings* _rmt_Settings( void ); +RMT_API enum rmtError _rmt_CreateGlobalInstance(Remotery** remotery); +RMT_API void _rmt_DestroyGlobalInstance(Remotery* remotery); +RMT_API void _rmt_SetGlobalInstance(Remotery* remotery); +RMT_API Remotery* _rmt_GetGlobalInstance(void); +RMT_API void _rmt_SetCurrentThreadName(rmtPStr thread_name); +RMT_API void _rmt_LogText(rmtPStr text); +RMT_API void _rmt_BeginCPUSample(rmtPStr name, rmtU32 flags, rmtU32* hash_cache); +RMT_API void _rmt_EndCPUSample(void); +RMT_API rmtError _rmt_MarkFrame(void); + +#if RMT_USE_CUDA +RMT_API void _rmt_BindCUDA(const rmtCUDABind* bind); +RMT_API void _rmt_BeginCUDASample(rmtPStr name, rmtU32* hash_cache, void* stream); +RMT_API void _rmt_EndCUDASample(void* stream); +#endif + +#if RMT_USE_D3D11 +RMT_API void _rmt_BindD3D11(void* device, void* context); +RMT_API void _rmt_UnbindD3D11(void); +RMT_API void _rmt_BeginD3D11Sample(rmtPStr name, rmtU32* hash_cache); +RMT_API void _rmt_EndD3D11Sample(void); +#endif + +#if RMT_USE_D3D12 +RMT_API rmtError _rmt_BindD3D12(void* device, void* queue, rmtD3D12Bind** out_bind); +RMT_API void _rmt_UnbindD3D12(rmtD3D12Bind* bind); +RMT_API void _rmt_BeginD3D12Sample(rmtD3D12Bind* bind, void* command_list, rmtPStr name, rmtU32* hash_cache); +RMT_API void _rmt_EndD3D12Sample(); +#endif + +#if RMT_USE_OPENGL +RMT_API void _rmt_BindOpenGL(); +RMT_API void _rmt_UnbindOpenGL(void); +RMT_API void _rmt_BeginOpenGLSample(rmtPStr name, rmtU32* hash_cache); +RMT_API void _rmt_EndOpenGLSample(void); +#endif + +#if RMT_USE_METAL +RMT_API rmtError _rmt_BeginMetalSample(rmtPStr name, rmtU32* hash_cache); +RMT_API void _rmt_EndMetalSample(void); +#endif + +#if RMT_USE_VULKAN +RMT_API rmtError _rmt_BindVulkan(void* instance, void* physical_device, void* device, void* queue, const rmtVulkanFunctions* funcs, rmtVulkanBind** out_bind); +RMT_API void _rmt_UnbindVulkan(rmtVulkanBind* bind); +RMT_API void _rmt_BeginVulkanSample(rmtVulkanBind* bind, void* command_buffer, rmtPStr name, rmtU32* hash_cache); +RMT_API void _rmt_EndVulkanSample(); +#endif + +// Sample iterator +RMT_API void _rmt_IterateChildren(rmtSampleIterator* iter, rmtSample* sample); +RMT_API rmtBool _rmt_IterateNext(rmtSampleIterator* iter); + +// SampleTree accessors +RMT_API const char* _rmt_SampleTreeGetThreadName(rmtSampleTree* sample_tree); +RMT_API rmtSample* _rmt_SampleTreeGetRootSample(rmtSampleTree* sample_tree); + +// Sample accessors +RMT_API const char* _rmt_SampleGetName(rmtSample* sample); +RMT_API rmtU32 _rmt_SampleGetNameHash(rmtSample* sample); +RMT_API rmtU32 _rmt_SampleGetCallCount(rmtSample* sample); +RMT_API rmtU64 _rmt_SampleGetStart(rmtSample* sample); +RMT_API rmtU64 _rmt_SampleGetTime(rmtSample* sample); +RMT_API rmtU64 _rmt_SampleGetSelfTime(rmtSample* sample); +RMT_API void _rmt_SampleGetColour(rmtSample* sample, rmtU8* r, rmtU8* g, rmtU8* b); +RMT_API rmtSampleType _rmt_SampleGetType(rmtSample* sample); + +// Property iterator +RMT_API void _rmt_PropertyIterateChildren(rmtPropertyIterator* iter, rmtProperty* property); +RMT_API rmtBool _rmt_PropertyIterateNext(rmtPropertyIterator* iter); + +// Property accessors +RMT_API rmtPropertyType _rmt_PropertyGetType(rmtProperty* property); +RMT_API rmtU32 _rmt_PropertyGetNameHash(rmtProperty* property); +RMT_API const char* _rmt_PropertyGetName(rmtProperty* property); +RMT_API const char* _rmt_PropertyGetDescription(rmtProperty* property); +RMT_API rmtPropertyValue _rmt_PropertyGetValue(rmtProperty* property); + + +#if RMT_USE_METAL +#ifdef __OBJC__ +RMT_API void _rmt_BindMetal(id command_buffer); +RMT_API void _rmt_UnbindMetal(); +#endif +#endif + + +#ifdef __cplusplus +} +#endif + +#endif // RMT_ENABLED + + +#endif diff --git a/remotery/lib/RemoteryMetal.mm b/remotery/lib/RemoteryMetal.mm new file mode 100644 index 0000000..bb69da9 --- /dev/null +++ b/remotery/lib/RemoteryMetal.mm @@ -0,0 +1,59 @@ +// +// Copyright 2014-2018 Celtoys Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include + +#import + +// Store command buffer in thread-local so that each thread can point to its own +static void SetCommandBuffer(id command_buffer) +{ + NSMutableDictionary* thread_data = [[NSThread currentThread] threadDictionary]; + thread_data[@"rmtMTLCommandBuffer"] = command_buffer; +} + +static id GetCommandBuffer() +{ + NSMutableDictionary* thread_data = [[NSThread currentThread] threadDictionary]; + return thread_data[@"rmtMTLCommandBuffer"]; +} + +extern "C" void _rmt_BindMetal(id command_buffer) +{ + SetCommandBuffer(command_buffer); +} + +extern "C" void _rmt_UnbindMetal() +{ + SetCommandBuffer(0); +} + +// Needs to be in the same lib for this to work +extern "C" unsigned long long rmtMetal_usGetTime(); + +static void SetTimestamp(void* data) +{ + *((unsigned long long*)data) = rmtMetal_usGetTime(); +} + +extern "C" void rmtMetal_MeasureCommandBuffer(unsigned long long* out_start, unsigned long long* out_end, unsigned int* out_ready) +{ + id command_buffer = GetCommandBuffer(); + [command_buffer addScheduledHandler:^(id ){ SetTimestamp(out_start); }]; + [command_buffer addCompletedHandler:^(id ){ SetTimestamp(out_end); *out_ready = 1; }]; +} diff --git a/remotery/mac/remotery.a b/remotery/mac/remotery.a new file mode 100644 index 0000000000000000000000000000000000000000..cc46505e59a545f21a6408aeae4a3751e35de52c GIT binary patch literal 92008 zcmdqK4P2Dhl|O!;8PsQHKtS*fqcdodG8hw4P)MCPfEqwF3X_-ipY1T>#2G-47ZZ&! z3~IX|nn+@rw8>VqO%_OOlP-3l+x~+|wu^adOtNXWoBsw(x*araVwx8w5$FFs_dd@s zj5E&kXSe^){`vTL=DFv-oO|xM=bn4+%agvs(NO8SNxvZLh9!$l<9}JpvzKQt%U!lK z*JR3BoCSnHXDwd5EIZp|%388aiC&?%+pjPG#=2Fj*O+aBI0^>$V!F5?I*abTm0{uBqGPs41>( zY;x3As;E`Y#-@h4ondjSYd44HbhEQ5Y}6u0qf5}Sbcs@Eag(zFr53u}HJclpwG%_v zJ9jiGA*FR&Y~ye191YcL8XQ|y`ZhGwZF4p>?abf2`8G$*_RwG+wanS*Y|5{xQ8HAM zfL7G5vcbKr$z7+GAB0?gX(>oTp;VhoGqdg77Fs;whodTd)zzXEY!C}nOnLb1%4;3l z8eMfkRTQm@(hD6mHHCHCYgJ~BBkF4Ewl^r12!sYzc|urFg#+Q`&YHVy?m$C~hc`JI zE-5Zt#2|i&#I^>f6Ev6A)isHL3FIkQn+w9aIhat?a(e?Ls3{yOtTcj;P3RJhjJiZ& zON1;hS!sMiSwL)L!+**h3vh#XUc*@_ezx?CHc*dZ4 z#D7p@><;vMX2OHN*?0z_KO+6@cvWC_P#XLVz9Qfh0D5HciZ*DX?icv8+ik1wumxTN zKQ{lP_(}OuvDRNRmOz=rF^|BA|Lyh~aInEqleLTogyy$R$!D>W$lF6%jQ>p?|H*#^ z^2-&7(EK`;{H#j4Sx+;jRGEha=S=~2dnMYBG^Y3q&2LD_&!`vpJCI-SUntA%_RWqa z$9N38eO10KfBc>ORQy)?z2n40`hsHepLYAE#>Sw)(EQYPq%lza3$8C07!?7C`qn|` z2SP&YD=YLpp~UcjzR)@b5-a&_-R7ulvTv`w$6dSGZZGBIx*DfT$#O~`%;!e-$SbSFX>@Zj%yJV|&yo({}3K@-LM_-hh-0zuE_ODxyKm;qrM@X#NE z_>+O(Zv0-#L;te>tc>*{Me2ao{?4bj{NLSUyQJ>1Le}AtSnvB<*7IkrCUZzD^$crM zdge1v8R8x`vXpZ}GmoT=?aJ@|^p+R8C@qBtr$_rAVv^OD0@$cw9WwJwGcx&{hcW7H z$b+44)Ofx>gnWbZ82~-QjAgHkv(}FwZp9K$8P8vVCIxCVtk=ji=S&)AE5I|N)%4K2 zN6Ty(k~I(I=Xo_O8=bAgJc)G-B3*hMs~^7Twd81xb#t7s#B4>( znwKH5yg|@H=?joga2`WixrfqtlswEm-!41#aDRjAr{5{_Zu?<_o*gRF|NF6qMe!_o zC1dr^JlB%ezJ1;_!@$D!=jXEe1)%xBZcWOE+W55xc3U4lSaY!K;LE>gsNGr9_v_ud zIj`^5&v||IEt#+Hmgl@KX-ZyS_dv?)x~Ya2qy)o(N$j~7QIB}gy(o@NdwvqDuK}+T zB$fkxlKQ-k)mPxXMrX}CU`Z<9Iteg6A+s8MX_pKoZscEu{EJr?XJ#OO4e~EyQi+^j zRN`Luy)ySNpKXv4N6slK>HgItWyQDk@plG1)wu$^mp8Bu-Pn7rvK4I$xKfg<9lUKl zZ&Dl`v-X2;8#5$%zb?+|Gf%Sm@VlMTtdv;&khb;v6mM+T^W9@w)M;7rnhg`M29Lj1Olv6I>h^xG`44ydm~YZX`msJn*w^ou5SvkzDu zYx&L!*7EmwT2~m?F`pTH)1hrq<{>;CLSwR*{9zZ>VDq+Jv*(O*pXHp`VEICt)nczU{um5bWLVHkIZ^o_plDHR@3eE zA1U){WWy{=(lm?%JP&!7vD`a{`SK*sw3(p)3((|W$b9|MBSxX?ZZkOsvFhIi+WL-{|3Hm63fXz8gB~gAJ$48-UWE3FrRlRpqBY4jTZ6% z)2HnBNH;~v-x$y8)1hy9*~mxD8#*^9Zwkv=@Ri>-7yajR#K-u=p$9Z~F63j*;R|T% z7Rai~FP&uVrZzf)GHpuy-yl5(OJ0wWnb0AVc-yBAYbW z^-_Ndma#xQ*2>EqqIjUqi#Cw{c;8z@7quzgeR87U56K?&)uFw~$A|VN;Yq|Z6;A@5 zcsvF?dOSKjGM*`TCgYieCk~GmkLLIp-cqNJ--@Ivm!)15(oQRQuL8q!W;5!0%91*aL9iZl_*3L9Vl7sj9b7sf|> zM4~5@XFpW>0Zgw$=?X6?ttSm_3pnQpMVW`6m)iJtliY-C?3V3_hIM(3M0C11x=6W zS#K|JpDO8xkuF?06el>G@*({}boGK~JiQ+E371|k;)G3)wjiB#L7Ud|+l&60f%clL z==wd(S1}1R$(}N#?+@gGdeT@Z|A=+V6|BPueJ^iQphtn791e;!R$uF6c0x^`dQuuy zZIiYoabldvUp2~3t{#=*#5m!>_(it4gtF2hb7zQmXlt%3VDDZ?vHCpVp$B!Q@Gj6# zb*Ug3LOKd_;I~?)Dih79bC>p;qiUO*C94lM-8lzrVHy`<=X8wiS`FK06>OkF*g^%c ziSoNI)cq=8|5M)djM{&JI1=J&U?*w2$6jO|WFyT;8kdF7A-``EO^82CGOpC46S{t| z0X92m)`5TF;$WhbGk~;`Vxu^w2{|GeY6gEQuC@9Gl=?gHyGxO|=4+X+X^+*Hs>L{q zGRSUiO~BZGjQ<`S1b@DZ--8$58-!iqYlZB%;#uzi-YM^sfNEa9xp-ud;q@c5u{ zLVUlx8Q{ZI^bfSbcTw&#z(K%av~y^`KNY6mQ$JPnqJB(y1((+h zTc`pw1lmBMhqn)CNrmis5EeQor1w_fD+(mYS|Q}80CJQMS$nCAksYAO7s1fEChYFK zA`M$6BX6?7!RK?skX30`1cz6QIFm|uUxGs`nHcHkp5oz4)uM~ z5tQbPGQJpD>baAUPwL-ey9!W7KKT72@6SPbfLAlPUZpZaCnDT?c0j>9fTZn z-d_XRS|H|l1DH<-w=L51`PXL{%X-eBZlKdHrSN$?=J>Y3DV}K~pqaujM%ormx8|;$ z&T^YEhm4zTy;p|4k?!}~k{}X(gIP*6ygrGYKZ*JNROFL5iB0n>^L@CO_J)^yk7GZx9VOE57$0O`d2>V2YbxvV6GlhME zu+K#poa44O3i}*kUy88&$;`F{?b1g4Rd`r|F;;B@b~UrLP}_p;qp z7d0HTXT@=wLY7AQbi1!A*J)UeZuhKmtYHGCDlmydP5Yyk8^Z^-Zubbt@%5N|TVUICC=GJ7uo@dp0ip*Ce}C(9e|D4 zoXqMsfzAZ_nW;W_Bun){%;F87doTW*KDKKJ;SEbk{5S>C!y_doQkM#_6`vXu9HGCPlc zV*B+*R@Qrv>NvI`R+B^8t5l?oaem-m_DY-!v)j$1{*8=~LucEhVg7 zFD1N^h`z9O`A;4{E~T^||DM!Qjk;8$E{Wz>c;1xXPoT3Y47%*}dcXg~N_S>1@ve1> z&?nTNTmNf|E}Lg`I8<-UrHSUB-_V|IS!9nV##w!@ z!yi>A=?x#l-uTJb$E`+v;`y7Ui zwFhzMzCbV@W+WHLE7%B<`Z>f!2(Z)rng9~j;Z9TDFL*E76{zB>W zs9l2Xu7M4Y`SvcXPrw!l_!H4DK7-u;8Fmx3J#SBHZ`65++vBT{t`O-9FxSfOMttR zVbi*P$98u8mvtxE_;O2ITGxN!y~^58_^v9}{~5-P+2E}QeIcL+VK;iAM`mDrn?5FW z8^^HTgmFzB>(ue>VePDCZJ^nWHK#VvS8QM>TH&K2ISkGZwrn=llg8+=UE%v z2cPLoD=*O%8x}5qun)`AUggz1u5e=+LX zeIx5fdS5qq*!@7kXg73V#%$)pev8yD(0t+;>*&Hb-HP#=bT&bqo3HaG>)fp03qGlJ zAX-y^`;_zt^h_(pd6~(tFsbqfEqd#iNv@bDvzV$nF9+ZW7 zZSQaJ45Kd(o?#sy!ak%i^jz8a7`kqlZKQFO#?VrX10SLed<-2X`)cACin;nmGwh`{ zN!PB2PdsxH>tEKwy3>zJ-7?yhVV&AEosDH+Y|g;E%mw{U^jk=VgY-x@%2R$mL3;6B67W*Z}3g0@5HmO3J z^q@`lqfN}aUw+(-HX++MlDz43p0awyrWr!JR6tgU_tZb0M<0%qE?oan!-%hnPxZVP z`+k^vPC40iG_Pnu{u$(-i4%THTI=TiOV~lJz{yxoB|C8#iNP;ngD;KX9o9S-AjJvr6M6ha{LH!xKW8a) zU8O*k-Z+JBh2Llo-o~J%Hyv$(woLVeFN_VsjeJvq@|tlE|s*ABs_oIQxXgw1K( z#%#qJLf2H5^9ba6dmPp(F%QGqP1%Uw|7aI<+aSWc2-6*0J(`_`^#O_b_F#?V7{ZIN z534m!*n!2E4^8yT#%5=(W8;3=P3XI1^QEJHMC0^)O~)hGin$@t^pZkT>sS60y_w9{ zwF%>eN)N&y58QSP@6Xj-v2TI)Z~dS67Gm`&w7{M@g?gJXE|8r`K3VGLUA5N!HjG7j zZaWkOYzH%J1lSD*vPWBMS$_uV--^8yDIfa-Z5+HNKyo8TkfqD66`-wk^{|u3|Ih+? zu4f(7p(p*#tea%&9Szpyf@Dhk)^wcVb3rj4(f$;w3;B!(mx#IRz%r|k>YI*sruC=R zpgjHzeKDlyMzIfZqHmh|WD3TZh&YrN2}k9~;4#*Bdq2e50iQQ+3>x!}290^}lg2|Q z@iCA46ef;&MasOf6=Ssy`%H8g^F~0o@k!R*hH}i{ml^yrgI{K8`r7>nE79fm>&#j+ z?YDy;Hzx(Op29<8M;()XF%U6*b=$~z)Xw*>8u~^I>z!f7mQhu#phU_aW3;c3f(ob zMA)MvOJK`_55*GRNKfsv0ZjB$1Z-LIeJT3(Vbs;pfITIOkA&7!sm(|x zb&v})&3klGe{jC}=y$<&QS~|5CXL`j3i_u5JR=+LGt|q4HbT4bwXQ;%TTdTf_j1qA z@5Wr}oP==lk<&VOI)1y-to?4xPiUUii80y*Th;}eq*b5a58Jlig|>EK?btORW8#G= z%K&{YJ#;hdnP6J|S}oQbb+FU*u;~rhU!dXp3j(~B(D(Pw9ZiJ|qUw%p(EkYPPkxL} zj7P9jvMMkZC}Z7Wdbff`z`*)Z4 z`!L4a#&)em8RAdlHO}YnqsWi z>@S$#ZkRrhTYbH5PUA_o(TIMx7wt)Yd5R~eqJJp%oGo`XJl*nilKPseBC;rvg&F*uzV8qrEuv(E48s=5RtEP27tEo^$@EQau>#8ti+W zj|qB!8dsn zeNph3?;Gph0rI*o__HhHFgHaS_^{Qp0vnIQ5B*=zU+HMyz!?S5@;-bAR1SEY zH~6JUOY~5B<(vUe`)j0C&ln5@(YTWDU-$5NRS9^cwiU)8)Q{E2+dSVamImHD{=M&& z9sFK@!=IWfe!3R%9^8HW@sGwT-}?AD{hXx9S1n0$FU+mRnw(jh+J593R{xd?26&`HU!rvcN!e3ZQI#|5fgTxO7r|QBtDqIMsr&b!Blvzx zdAI^?PV{-8Q}3?YKDvzdaY5fw806$H+S6vjdf5Qds`YaqKebIkFW(p43_7i0>MHcX z#QG0G4+Pugd=3h_!nMtGw9U_MT;;nG`mOM11k-;M`GwN|{EczGu45*?@6Ltxn*m-n z9m8H?MIXF_bp#cE_>89GJ?w#4fW8;J_rQw2P5dPpsnE#n!F5D9*9jHSPc)a#AYCZ4 zemWOmT#V-`_#CW|xo7F@hv+-O=TC@76Kx>YBeh1C0}pfs(NqCB5bZO*muzG~|JDn2 z681`J;7`2N7wO+S|Fj~j%Gr_Gqbai0_fG>7)@;W24^#f+zv6pRux8F}r(a6FI2%Lu zWUDS;*#CBnp$zj0J$xkOE5qDvKkbF5Clx-9R;=OB7}QEO&}>#e0Dc;=Ux~(Z&*c1W zDu?vC2YQ))-whk>wx3JAtr$P#-7I-J*1?3IPwY`1EWr36<{bu_Tl^0Cs!RKgOYDV~ zpG#;rPZ{cqHNuI$(l56ZU>|9I_sD{OtD!dds&HC|YYNUm7?|%up{4_S1UkG+neS7C zt1=}^sjvLWh&ALf?Rh!tf$xiP5WIjw9@OgdIoNgWqBu52Ue0U@&={R@1L z(39R9Aaev4GG8&Ei++DCuDcDei~rVk&qltV&1625o!UH~`I_f3rJul_-VHkw=dOtF zpWzIH+TL9s`*RS+Z6N3eTEpV=;WO|@Q`{GXO%%ZMc7rbY?s2L2-vK|v9Oz`Gb>|CH z3(YjoAv}d|$9OpiJGB*izy|<_pU3$T_`4UN40SF&g#EE!G+O)p7ZdAW#NH#avD7u< z42<7J7yV*=_COh><_HD8yO_0xK6E_{+-ti*hEA*=Ub?4xyEDnh=o|ds1p8-P-4+sns<;zbyfBe;m(x*f$?xzV*P^N3DKI&(~woJ{#dz-`|gU_53NU ze>vKf)?-bDSa*l45uY;rSd;ys#Mfjq{6|`;4rG@Qy{atE0=-mEYTMCKf6j6AfmY>x z5Wl_8=jV9~HF?D2_b0QB!%M9`4S2K}*|BSZ3{U0Rw;ly4Iua;%5gv}7Avwq3uA}(|TjYC8urJ=b|=Nx9EeRZT$sa@X( ze}-{JsUU7o**=6(JhlBYFusz1sfEHpH_k(FJ9H+)`WVrGeB}$_*BoZUzx@UB#`*8m zFTmq3HR!wk#PdDSX+t>cK>JJPE9DuBSbgVA*mXOM(`cX8QM3!?A1QBI&yc0rYmG+} z>x;+ywjZO6p~F&+J%ycr`hwr)ebb(6%z-^fzt7+|?B!vcVe)<=!dvLK2K%}Zo{rzN zU(cnr?i^WLmUp(RwG8u#YT5^9l8Wvfo(B0YEX(V{dQP{-^Uzt?@~^;mL%yIR>!kYO zL}qK=m{d>eGzT<>_5;s-EBDAFQhhpwuS=>QnyQ4KMEF6e-hub#;-q?$8h#q#Cs5}E zgs)DjuK?uv3?uySkbgYF^N~Lw4?nNuPvI8ikF`-AF4=RBv>|`IH|HULH9Q&Npz|o! zW14f2KOm2vj__Y1f9QhdEaVT!!?O?$I@2k95%R}8g@@-O{5bN*d$Sq&tKl0Fj`|*j zZPt7Z@(1Mc9SA>-{IQ>;IUV@}^6+|uqrT}BJ{S4Je&^vW2>%B1$9wZ^d{dXq(HKTyD;MZU{_d|VjI^xnf8`vQc{TdXP zbc801UxRSG|G=)6(Z?zOF7DUBnL+ep$h(iqroM^!t&eC_%M|g1o_Y`QF4qsIqaO~! z57ecR*BYNptp6co>o@Q_+_OXK^<%E+4VtTtM479e!CZA1w#o$EgSjf#m&#n#8#GrP zLR_pvx4WS;X|C!{5pz|FLvvLx%0C1CkZug-{SNSEGUif-^4 zI-%dJ)fw~{W6+ix7wQJnTu;h#yxNS+0_+2dMk8Az_44wi*#`1O5RF8Kj?HS9rCDn; zl3AX9w_Kh%onTUV=4`ARVecw@nR!(AGoYQub($*^-~WA@(6jg*un9tK28_{VzguGM z|FW9(Kd(uvK4y_@d~LZ(>dnA-s>fKG0h^NSO}ivlcdn3ZwC+YWC5@T1hDJ{w#?DNP zkv(6RvS%XCOz_E$F$d?;ZFbm|wCC84@rprj*= zzfES?*RMNgHZt2FWGpj*{9l9cf63hch4n4r|4L@b(PYe8Bk27D=#^p1#6qXgDHCaA ziH!Fd*pkU#R$KdVzswco2iec2dh`6I#GY3qdG$Zc`u)3D|M#IQzbt`nE}WK^f%Ao~ zcKBT3yK2MurNg*IYZlkhxg5yO0nCN%pV7Lkf%`F=v6tT42A?i)M=+n)K)%mHzZxX{ zE6tc!eTX?+b=HCJLoPec89me1A>ZR3mQCX!$s+E$IPZWD;HEg1T9RyNC%Hwxey#%J zD(Ra6)H4nA-EKge?#<7w(u=iXv}e1{8Mu|F)2j_6~hWQ9Dz&s%z`zmzMt!B&*xvi^=H&)D7t=LCW1X}VS z8=?a| z_-$6y-;43_jCM{Y`RURtg?&Qxmcgqc?Db{PH)3z1m}6k=$JYhFjH*XS@6S&xFjHD} zPD=BB>IWDrxEvRwZK;jb@YQ&Sti4Hca-gazH@<#cPV7=#J;_P07W)>UH)gsr`iM6& z`ZMW?6pLrIU~j#vRgPGNG4#r5HD_r{X%z3ko}@_2pUE-k?ogg zGBdDA@IpozpMj1_wDsOhsTbpPKk-1FuTfjRJ$aljp?te1j-B}8Vc1!)3v)putpm%7 zomK1HRDQ6Wj%JR_>0hI~p=Dvp>#>@P6nP!}sWqfI zD5Q>%3$-u)HOd^gVPcuY)0a_(TE`LmrZbtZVs5t*Hen5HQXR%h`h9W+?DuKbUs9h8 zv@`HrKU_h-G~tdDl5Z%;RM_ns$p)>2t+W!hF#I95mAC_nd<3JAf#9-r=~rI1^=qQ+ z&r$YDlZgpJ0 z0^W4u&KQ!HYUCfWJ*JM29bNDf2FH^z_rqQpZo5$1jn4lc(qE~9&WQR8b4s;u>;4_S z%qz6Dhp^Y5_!3r6zv(~Gg!?T*+rtfczfxH;Vb3y^o3FG-g%)>@6?LtU;lq0hzE#fq zCrydC)9PE7?KiL~uauSn*px)e*A-g8i_zeII*K(K>Px(z!asJU57V7g}G% z?*D7l)3aw{J&mDdDg7`|))kBirshk^8ZART?;&4g7#rEF=)Ehlk#{LPm1Y@kQsjrm zlURI_WcR;MYl>(G8jI4+Y$G$X4%PmzfZwn46!!OGU)~_%kst2`#M?`Er(v#Qy4RmW za|H5VybSxC`ZxXlH~gk`6Zj#|d0?AiUYLCleu8z_1CxN?X85$oUjSbL`3$n?Zp20+OT&*kGx8d*GlYz!#N(Cp3c+YoW5~PTVFx%!Fa3- z>9N-*6Z|v!fX6%KVb+oVXFMP553^O_{1T4`KadIUJYK@YcooodJyV%?hA>_S z`02XAEwKPWaDxy_NQdeI)oB(*7hjf%XDFXq}O_LrDA6I%5_3 zO(x|B9+BMecp>dS1UU)h2i}p~@OUBZ-w8R%RPzHrNp5(&koK>D{s_bapGj_bJn%ZW zz9y0!+D^{i6=x( zNNy+&r&p1a3djzv|B#F@RgQ>GtOXfCCzl)07?2yx`_3&~)qOGs*->@Jp3BJ2t}wDQ z5F$I)5ZM_BksWIU+3|$P4#PWdn-JN7pLK%l3`CF}hVWq7@kEfFfe_iTMvxsEdrDLp zBmU!^(-$H;))3hlh#)(|kdv>0pTRnY)~PAaVA&zP6D&K{FtS5>hsRNLj46!lklx|+ z|0VJRe}y9@cD;iI$H2Jb#{HEvCaL32TNv5#z-Jw-cNo0~jVD;sn4ovCW+HeVqIXE< z&SIYt*C)_BBy-_phmkDeeiI%KdWU3=#|zOr1CX&E?9buxpm#{-c)Sq3BR9_Y^`b37jUbTD5Af@SRg0eyJqLt$i$bQb5IqMu0Ss9eq~ML&_w;yh7gjAV}D@cJwIiF6jPry^tOSjX$6 z=%~P0mkyl;+PTgG-2t67ope^={~DduqqZ01?s7USAWx9H%jzsuKFLytmV?7jeV6r7m*e^M(y^8O#O2SqH}GY~z>_=a6Rd^?UL&l7EVPi|~bA zvEK{hD);lz-Z7f%2j`!GJ(&aW352fQ>Xr3v)u(bL>)Qj$UK7<%MeC(lE6r5*&2qmg z%A+|vtwpMJSIf>!h_Hr`9Nk|v1YaWg9o4c!>$&`f@$v#b)hj5kLil31PnCQrq4JmU zq5s5Y#czhaef1-_i-GpW>1a-t%y!^hKpy$oqUDVe}>VYu>Y9Lya~Pyl6!R@ z0GDg@8yWpZ{%_pvgE~;VRbhQQcy0TXBCl7nwoN=!+l|(@$%jn53`a}Xu>VAo!cR&^ z@-2KpbKpywb6H;!Bjv3g`*M&11GXedY8P#WYU8H-_=&qxUsGq(Yd(>%PUpn^B zWZ;en(kZi`Q>YAno*4T;XJWq$r7eV??bmRxI7Eb+V==fkdxwj;yX zFAG1&k$=T_j`tDx#?0>O+?B6k?erfn+V(TgLnGK{LicR)ebwpMr;9eH{kv*DoljjH z-^Yvnx?%V6V&8AreY~^}8RduD$J>cIs{4c0vPyq=QQgnm0e|FaqJ#GH=3}pTN)Ufq zLij`X_^5eg9=+I0cO3Kd;Eff$G1LBW`0UrhPikC;J<6EF(B5t}jpxWk*#3rg8GF2q zw1*sX8g-9150|ii{UGX&IgGl;n};VOoc54o&Z6$|=HclGr#KZ=q?_965)#`$_5WVt>jPb zrR?$M;ll|3{Y2T|;pY)Pf<3Q@FW$eEtG1Erp^MqH$Bks3eAU!`=d|%XE$98V1E1o& z2>OmOsP7D-@3>C3ejjU4uaKWf&Bt`;qHV3xcbbFx&ZB65FVaYoVeJPv!$bR*G0yNa zi-n3FRr6pET+H!)9p`bN=V?!$7w3Dl@O2NNe-0xLb??x?fr~k6n3~rQ;pZZ~GSrSd zU_%s<-x>XX1h#}3HU!S|betj^!j}jeLerBE`K9rWbomf$BXyh&{_V%vm?7weVdMCI z*+STzRE|n>MJv&QvoJ&>;=K<#3E%nmB0MyI+-r@;RBm1tL@@qg+G zJ*$|?#M%L!6Hw1ode9cZcPF@ThR_WD&{@4Etj7>-!SM%Rb5Sd4Ln_}Y za896eD$Z)sx0_^~8wcJCJdO2qcDn_1n&{jU;tj!WgPcu+?cDD|ybSDdE~a|u@XaTb zxd6Oy?p@(3eElGRSHM!o4$beyN=H9b)|kXPEXNOg+O?MEET% zkK)NgKWTU#c9B=xnkMT8zUeix{?^HW%h}FhtQAvwjk0z{*z63x^W5$J`T#@9?D;T_PsW4qQtc1j^bYjGa7q#H8D&zGxvw>`)k zelA?OB+k9iJ%j3RI+v>c?o@afj&DOazeV*@pU^YUs+ZcU^66X=^B>bcM7tCr=4qvy}yO}KEPi=J@zFwvbo)Q3M)`moC9 zGfTzZ-T|&hF1;VA5#L8~2h9;_K7jst+5PpKaE8Nyd7cbj(^@9y4faq|Uq>JEnb6l= z$~>t8@n&F+oyw3g4{>3B5IiTl(y|6HCkrksHMA@#RqPE%+fZ3JUmS67b+md_U_M4= zq28&~M?$}aK(a*j6?4+mOjQrEeb&K;Dfja4sBv=ioBv8!SX03!z&L@5*EwPXTAlx7M4p zj-=_)SByTXzBsEb?U+Wlwgq?np2EF8W@+{kGw!?~+)Sb}5^;BW9Lv+;ERLi(rFK`%+I5_f!A}tL706th###y$UrZ~sUEk| z2T?ZU!&7z^XS(&p<~}ZuN?%lM1d0RiZ(06|hvQw42H4ei^!_T6R;;z*lxNEf!cdb6g@4x-+<8&6?d$Hmz z*hVMRZwKU0#(fBAFJ2FvVV8IRmfw@h&z0kDs5baS)Uk5_G8TGo+?QMawF_xeaW7u> zcMF!a(|1A+zhG)VqM3270Aoxicr>Ucc%KncYTQ}U#L^SF^8`Y~D^~$(rFm1YI zeTeK{x;q0reX|8^K>JG6_oH~9ue?*Co0{Im*s_x-pVC8i-t=guLw4?jJp5mfi*MrYmT%yj0gwr9AJ9CJomRgi zj&XzA(U`yah874Oksd}I9^Z`MfWC}w|jAyqR@*dkC*>2@^c{%tdR_2Eb^Vj zebcF9X9sD$gM1e>U)Dii(LIbApfyrH#6R^3XM@pwnCkdP-$A6iC;wT)rnzuFm+m<_ zqd5Zmp8VdaJs;zK#dolmavAQPycP1|suKIZD$$=^xFfs>VKVj-xT;*ESo42H#vQ}% zOghi5YjFSc_s?&X}s#=3F7A7fU3H}0F)<+1+GY1luX?6Gvt0G#fzbmLA*T^93oqi#Ak z;^F>T-B#=qrf+zZeRyZIFEqt{^!QtzW5#Jz;?pxp-1W+xEmR>U1f z`9oUqP0aT(S9%$I9Qe|oa{|1~0WXO@$YYrcX@dDd-yu_FH+1}m9n9?(_%OM?BYDe! ztdJb)F>aoNe$C`EC!b4)yuhc%?-!*pPDlPBk{Q^(BzvmwMcnB*aeO`knjGkta`2o8 z^F^8yd4R*y>w<5-6+ED^5q3j1*<~4cr?Jd~zCqzFfI)fid&6k#QsrL#zTXh&3JoLq z*1^B`mZJMrc_tXjmm$zY@_Z7I#sTzA{=Ee7rJwG6{r9N_qp%(5?jhVWBj&A;wMVHf zD11BGqZ!{Dc<+9OF=Zkz)N+IEM=FC^afcQ7b6A7Ei+m}bqkwdRxWBTw0N*QHgL|%3 z9DWCIYk(^W!r^xSR|4GnARK-NaO;693&P=d09OXwZ9zEv4&ZJB?wdh4{0`v0N%A*t zCx_D)RA0;?d}eBQv}qCUr_ cv+b3n614;(T@S*oT8hBm>t|x_a6lbm`r(&69Db zC(`3mufR7?W!%-)3OUBz4udlCS8crSQhy5i_9@BuK(ue4B5!$IX2m^qkk{b;rRp2C zT?q!aEyf)#L$oiPWVj*?dH~}_FpnsWU#o#FqQPBiTG)7T7<(pR48k`Zc1^(@u#$2g z6X-#|q`O5;@DT>zM@+g**lwf;#C&%MV_vwsC+V!**shzA{~DCD8s)7*xrG?(3UJq{ zCZI3g#vZr9g6wwK@IJbC=quQfyI@ED3+%|pxgGhRk6#5laujQCU*h{Czr(lI{YJKP z2z(p@t=a%El#YQ>(<+QUm<2@gD!lj=|yAie&i&(WWWZo4+M`Ws6Rr-``q%^6V+-N7^jpO<?$h-ejv5l`<(IN?QJBN+Q|w*Mi0bye;{?9<1Z zSPAV#Cp^|mlF#B!6Rc65ABO!)Iu(9moHd`l);pE;zoVHA{)=(+{ZFx;!u1#Kdj+(@ zKidju0{jkO1$6R0Ko8&^K%$kNaD5QIS?(iI=YSLs?F`>E{3Ib`peo}oe3ypeZO0jd z?FBjQ#D~uz6L&)KNE|-aS9-j%BAlttYwtur``nfZ({XNj3U78fd_D|{2D(!*( z8qxMYigZ+-;P*S6-&jYlCfolD)%M?rwSCzB#DnntGD3KS_{7uj`LM$upf4X`nAZXx z1&>zZK30t1whHC$Qj-FYLAIJha7@9`{j46;s~C5y7W4fn_yz!A3GPR&#vQ94YNbrt zKay~jH5X?-PG>Cjlszx8`hEBwEJMGLp*#KvJ1)WR$9=tW^#EjMs>_x8o|I7S!0&Zf zr=s|qlZ)v4u^lH7hBRd!&`mx8vIR`YliQ_xCu7ZH26Vto=z>|$HM6niF?oWnp*4;( ziKENOUQ+$1@=Z^a72Vs{(51bqPt}i18nAR~A761Blj^B`Xn#N^>Opf*FYNAa+Uvc@ zYA(TALp7eWH!ySNqWqwD3GW3#?;5-p2EA+XzAETF4)3dj-Y4OGP0;&fbLKTS^LI1K zn<+^hXTLVZ+*v4{9)=x7a`m2OW_2dkuUj=U*OF}p**^{&>p0CttHEp7#_-pB-ZY>; zeTX#2Us&G$V|+WePBQCXPycn=E!*#Z=#aav?{z6D^8xn_eYEyzhOY{DrJW1I9=L1`}C+|_kw%Ow3f3FkE=&AcQ3e`zf=8PKW1h(^j)|$1qpg>Y}Yp+ z19w0cZpV0V8|E;R1LFYkDjs}5U+vJz<9l__L04d1F!dnzuwU=4$&Gi_B#{RZfr&}N5aj+)`-_gW(bP{@= z-^k8y0$oSe;f@r@QzF`ebV?%fIzV?8>9f)flIt(G{9=Ni zL~pX1<8`em#kl(?;ktbDkEKlg4bs$1U6Qqbs+6F2Ng4V;N$yoYeS!EjLC zm!i=hN~vKQ{WfWV{t#rVqh?XG0`PyRz=Ii?M&4R`$oX%<~&aioFd&=TeYkvXL8VcBCJhGvXAzas~%mvS^GCzsW%RY&+pG3J&vJ}HVvB~;u zeMd?M>d~l4&U{Wv)&G$#(l62^WIl&_9ApW|*P!H`Z1@rK%DT?l{|BV~153su8$JO2 z|A;alW>Yx*C%}iFf%eys&pXKHQSk4Nkk!6~!oI_r8HU4}S$JeTGY#LZJ)F|0nVy;R z?i%y2kms-1bUZSi8HTsy@1%TJe>mm0Y=(Xx+U0xfTKz2T?9A_J;`E>5DxPd@YUZzM zzLO&AG*hY5WW!US8-CQ1>=i`kEYO$=KI==&lBQ(-?zWT>$w!j5Gg{ti=^j%%%&P%mEsMpkV6@Lpa$*y0Q%q!P5)4K z!+##fcfWP8cl14HvH#DK<|a4%PuCcfmEmT&bPlfKor2RtanciHwzhLRe{9L+H^ zG@255cWX*lI_s}TnMtTqk|r5XqCrMG9fwYM9rjT=_E0YJXga982Ood5tnU5iCX`3> z2FkB1-`@8e%F58lCC|0oiE{7k%c3%*WKqs))<@+ONV-hWqc4pgQ**IVaawO(Qa7Sp~?ESGyMGISQd&pwIu2Y=V{%PnFJE1a!&5@#gs zaqKzxiET9ZnhD?P2<~dlkR&~QM?#Og()L$X_3ekvYL}$yA?RD!4#_A-ui{E@R~qSG zH|~qUKA(+l`2LNs#c;Q*t#umiGnZID_Q&{K!0V8f@ZNike^K4)YsYOQ!Wf@(-9WPop`vx?XrU=6sLL zXB|84XJ(@3L;>r-8Ta!Zj1@=s*!yq>H2WClAGFr>OU%j1*Bj2BQ_Iuf{&Dh4g~qWm zBh~}Q{a&Fy0-0gX3qPG|Khqu5t=wP1{YZSB9b-S=qol4Uct97e4R6AF2>Ea*f7MsN z7ruk{VKbB^E640dD^6H^Ip;w8q?*?^FGkk&HFum?~|SL zCgJ;wOfEEb0($?ze(JjLhfg%9kM|GKPm7j5{7J@FfA~a;_jJVBhlk?I?Apb+gD>_aN~yk)D^CIbNm?z7AfuD=4!9vQA}A zh0F{=KLzX6kp(Yt*^U%WGN#f~e^enfp^AjC(Z=IvNQx?jX$7_%Hz zw5LS8dvNc%18WTGJHvYy?qJZ&Wz#5(`e!J;C@Yo9qbJy}XANER#~DWKQQ-HexzX?R zoQGldD4R z3OyPwFCk+S^QUyM6ZrQtu$JjFe#}PsnvADq1@r7#Vaz4ptmZhI<-zqb!bcMN4UM38 zWiOrB%ZL4Yhp>j#Q7~WlJfp=UTB5}%xNk4_pGJ$f=|F7pVkz^5r7ba)xhFfec$*H! zM%$hkdiV~h7fjPVM# z$Clsxn%LsS!n;`F?0M>MTi*P8Vk_^=-Lb`srH=m_<7lSFCiey2*mTbekHi))7GA~L zzhmvYv6LB0oEI`<<5?_uoymzUulYHDJvr5s#@0r$j;k+hjI9o5%(2xW)_Ae-_pcRa z&&n8Sx5vhdSm>I+FSd5ld>UK4|1ZYzO;T)p+p|2jcro#9S!{Xjc_KExZ8{WNyfZnm z#XFWCTfA86daOCNx@vBUt;|i9*y0`gdTjCb#29bWj@ar^5M#Vpc=y+~y9-RQ@nlnc zY;}F%so3({^RKbR(`<ANY&>p2WPEI5Q?7 z#p8cT3N$G&keM2vF^b!;wdwW-caw9CyT-}NoLlRfoDDm#uVi;^Go@Qh>u)VBW$Wuq z&W47%2Gdq&W20k>lhUjcKR0f7Zg<)@IGxq&ojcS(R!^aUNYuQ zifbF29JQ6mM=5S%)K$*LriQwmA#u0ZRpLZen@tvrscfCiZY#TWH5Nch_Bw&L-0eQ--^?$=v9^*I9R0Mvc4HY2UQ{uDhHKW>&ksriL}uG~VoNVlGGR z<{DlJTWPtyc>QhprNyi4MfvMjm97r($zE88x-@LBY^rM@t^_JlR#(?#+R#w9&Dqej zld&6AiWb$>ZCS(?vFjT)xog=y3>7FSz2)XLAr;_kZ*(@??Q95SDToZb-s9Y4Z>+1V zMvc^VYO3588g#pJQ#p@hFWX*Q&NIX(lQ&!vR_LhxT9c`gH)7@_8JP?Nf(lR3sO)S32EyJ2zi{ z{q<~|Hyaw9+Z+wf0xkwj06Xs$th{{Ph6y3JE?vR|BkkqR+Rf_k;>N-nH!6YxiCxak zLD}TYrF=mQ3(vh z4$Y4%E};v8n1DLCYqvK#Nii*EO)m7YagxyV4NYsd*H(_J6r3nPjCBs^2UX*-+a1-; z?c0<_zjYfV038IRZQtq)R>Z1QC^0q^ufI7+s`4uX|EGOmtmoV8n;T&A0dNBISXt5&bMxv02gZRxu8w`|y0R&KlXw%hOc#y1_C zDmOdt+TwCoRo85-t=m@L(Ac#7?t6CZymxWdlBL->%a-TnExcizQj8hAzq8^kwRH{D ziAW?#A-HR|p=WJ!Z`lIX(*$LY5NM%VM~$i@Op3zI$XdK4+q7wCle5trXb~m@ zXia`;`D$>scC(OS>Uv5O(Ew58Xmr^en`)eP41yRNShaH}$EdhYAR>DWmY# zRrzLS%2>5@RaTbz17~ne8Mkb6*4|u7zt%aM95wXgw(T|5j#@K=S}bK7>KdESWh!9{ zs6BSP#0gq+&9ZE^q0CmqHk7a5aEbk(h;pzA0bYrYU@xgdPZ)o_%~9jtoFTfA8OL3y zv#{~j7qY_yITN}sH&se8+$)Jd_+1!*8tfSD*E_a4scBd(egs%E&N(~RC8R2#)V4Q} z=G@?*{{wM_I$d;=fYjHy{uN0`ZHvrqsV%Ij6S{!o)nsARuc%sp$_;hs|Kr-6UK<0t zUKB&kdkJs3)((=uV0{7pY~AV(m~9P2_PWL`uyfYeHI;#}FkSrkBMWj#PVVabHTgM< zR_A49FUnrNDt}R4PGSC{!kqkq#j6T)a|*JuSdq=P;fAd1vrJiwvrOfDgr-i1N;K6~ z*3~p7=x^P&rNOb;d7}yTi!cNd^k{y?#JJI<;1l%a&dNoB5qgo(T{oIwvo2V@rS0k0 zKDzks&mX(nwj}A};Wu9-`qaO8@dL|#6aSQOW<@aU-hef%#wt~QtcrlAg(~{%6X25k zixk8A!wC3nCB+>Ae@XcV6#RV=(#u+bkR#xeqTpvK_(=S@TEQ>YF=hPzVF!(ui4ZN?LhzgOYVJ^e+h-(w0s5`8~X@J}EQka|h^R%O1k3iO4;@0-SpXHVe5 zkZ|=g%@+7y0EDCeT?M}w$Z++Kr?0Z(kK`j<`j-^^EPNa{T>5uSl$x<35FZYI_0q*>9;BPNc=qz1>dFMBk_MIO8MR>_>ZIDN2A~w4ijIlePjh6N&ZX< zp7dO}{%6Y&`A14$qu}w)>#*r_aS(#j7oq=IafvnkHQ@=@|9Ta?EdpM?j#4xB44$y* z6?|s|`fS(pLNbNpQ*e=lVTyfV&Qf~OCb(BBgg%Kvz&z<&S`j{k3G z3%nwKapGev*RUfN0_PQ=#DRL;*PCdP)Aj*edeh79st0wF2K4fqx%R zLi{B~sQ;(+0{?CV{2Pq|?*cwt`{ZsH_^W}(zf0)9^KOCf1qg>vx<}xD3lNU~UIp)s zfPZ6$NWTG@W2n2Nd=oy(L4VZ0Qo`VE_X)frrkAAmDELVFw^hNvifqEw&$e6S|13Z_ z{>9xd@bO3-uKr2Q0zWGPKEosM)PKXJe?q|%|H9F?uSKN)6d)XZJN5`Xb%Su}pHc94 zg1_PH1Nqw`J=uBT^55_sfqz2)6~v zOyE}taEbhGQ1A-k68_i}d`5)y_bK?RBj7uuq#ule|2RtisoxU?N1|^<6#V8W_ybY! zy;1P*M#0B@U(g?ke;HBmr3yY${hOkse=JJ+fhc%y6nxU-mv5ikDENvf_|_=+XBB)T z{=OXrKN=-}(-W6(zg1E2E(IT{{SHOJ_eIJ7-6(kW50|I^>L~c4DEOKv_(KXlQvLg) z;D@5%J~S!5@x-?^E!RlAz>`Zq*Lzat91GYWn% z3jX6L_*p-?eEqCZ@Gb=(ss0C|r0Vy~?#av3e_a&3Eeig=DEJ>m!M_>>|7jHb zRZm^MeygJ3YZQE>_Bj+KeP0y(P?Y@R{_*nkU#H+@5lth&`2X2a@GGL=S4F{>M#0;n z;O~rruK?aHu|;<%aDxJi6lhgot^yY;FhhYRz(k3aUL~Mq7T|v{R+=iHB?<6G;Nu0f zFu=Je?*iIoGQvj%w0sIk={^?F@*&_=z