summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rwxr-xr-xcompile6
-rw-r--r--compile.bat8
-rw-r--r--src/ac.c257
-rw-r--r--src/bitbuf.h58
-rw-r--r--src/build/cmeta.c15
-rw-r--r--src/build/codegen.c13
-rw-r--r--src/chunklets/README27
-rw-r--r--src/chunklets/README-fastspin109
-rw-r--r--src/chunklets/README-msg55
-rw-r--r--src/chunklets/cacheline.h45
-rw-r--r--src/chunklets/fastspin.c299
-rw-r--r--src/chunklets/fastspin.h65
-rw-r--r--src/chunklets/msg.c275
-rw-r--r--src/chunklets/msg.h350
-rw-r--r--src/crypto.c24
-rw-r--r--src/crypto.h14
-rw-r--r--src/democustom.c113
-rw-r--r--src/democustom.h7
-rw-r--r--src/demorec.c20
-rw-r--r--src/demorec.h25
-rw-r--r--src/engineapi.c2
-rw-r--r--src/engineapi.h17
-rw-r--r--src/os-win32.h4
-rw-r--r--src/rinput.c2
-rw-r--r--src/sst.c109
-rw-r--r--src/sst.h7
-rw-r--r--test/bitbuf.test.c22
-rw-r--r--tools/genkeypair.c42
28 files changed, 1767 insertions, 223 deletions
diff --git a/compile b/compile
index bf01493..a94d8b1 100755
--- a/compile
+++ b/compile
@@ -50,11 +50,13 @@ ld() {
src="\
ac.c
- bind.c
- crypto.c
alias.c
autojump.c
+ bind.c
+ chunklets/fastspin.c
+ chunklets/msg.c
con_.c
+ crypto.c
democustom.c
demorec.c
engineapi.c
diff --git a/compile.bat b/compile.bat
index 75ac2fe..9d8a2a3 100644
--- a/compile.bat
+++ b/compile.bat
@@ -58,11 +58,13 @@ setlocal EnableDelayedExpansion
for /f "tokens=2" %%f in ('findstr /B /C:":+ " "%~nx0"') do set src=!src! src/%%f
setlocal DisableDelayedExpansion
:+ ac.c
-:+ bind.c
-:+ crypto.c
:+ alias.c
:+ autojump.c
+:+ bind.c
:+ con_.c
+:+ chunklets/fastspin.c
+:+ chunklets/msg.c
+:+ crypto.c
:+ democustom.c
:+ demorec.c
:+ engineapi.c
@@ -110,7 +112,7 @@ if "%dbg%"=="1" (
)
%CC% -shared -flto %ldflags% -Wl,/IMPLIB:.build/sst.lib,/Brepro,/nodefaultlib ^
-L.build %clibs% -lkernel32 -luser32 -ladvapi32 -lshlwapi -ld3d9 -ldsound ^
--ltier0 -lvstdlib -o sst.dll%objs% .build/dll.res || exit /b
+-ltier0 -lvstdlib -lntdll -o sst.dll%objs% .build/dll.res || exit /b
:: get rid of another useless file (can we just not create this???)
del .build\sst.lib
diff --git a/src/ac.c b/src/ac.c
index 605a2be..263e114 100644
--- a/src/ac.c
+++ b/src/ac.c
@@ -20,14 +20,21 @@
#include <immintrin.h>
#endif
+#include "alias.h"
#include "bind.h"
+#include "chunklets/fastspin.h"
+#include "chunklets/msg.h"
#include "con_.h"
+#include "crypto.h"
+#include "democustom.h"
+#include "demorec.h"
#include "hook.h"
#include "engineapi.h"
#include "errmsg.h"
#include "event.h"
#include "feature.h"
#include "gamedata.h"
+#include "gametype.h"
#include "intdefs.h"
#include "mem.h"
#include "os.h"
@@ -37,33 +44,91 @@
#include "x86.h"
#include "x86util.h"
+#ifdef _WIN32
+#include <werapi.h> // must be after Windows.h (via os.h)
+#endif
+
FEATURE()
REQUIRE(bind)
REQUIRE(democustom)
REQUIRE_GAMEDATA(vtidx_GetDesktopResolution)
REQUIRE_GAMEDATA(vtidx_DispatchAllStoredGameMessages)
+REQUIRE_GLOBAL(pluginhandler)
+
+static bool enabled = false;
-static bool lockdown = false;
+// mild overkill: 1 page of memory that won't be coredumped or swapped to disk
+static struct keybox {
+ union { uchar prv[32], shr[32]; };
+ uchar tmp[32], pub[32], lbpub[32]; // NOTE: these 3 must be kept contiguous!
+ union { u64 nonce; uchar nonce_bytes[8]; };
+ crypto_rng_ctx rng; // NOTE: keep this at the end, for wipesessionkeys()
+} *keybox;
+
+enum {
+ LBPK_L4D
+};
+const uchar lbpubkeys[1][32] = {
+ // L4D series (PLACEHOLDERS for now; this whole thing is unfinished!)
+ 0x4A, 0xF3, 0xE2, 0xFC, 0x9C, 0x4E, 0xCB, 0xF9,
+ 0xBD, 0xB8, 0xA9, 0xFC, 0x0E, 0xF7, 0x93, 0x9C,
+ 0xC3, 0x09, 0x43, 0xB2, 0x6E, 0x7B, 0x1F, 0x19,
+ 0x40, 0x05, 0xE9, 0x60, 0x43, 0xE8, 0xE2, 0x03
+};
+
+static void newsessionkeys(void) {
+ crypto_rng_read(&keybox->rng, keybox->prv, sizeof(keybox->prv));
+ crypto_x25519_public_key(keybox->pub, keybox->prv);
+ crypto_x25519(keybox->tmp, keybox->prv, keybox->lbpub);
+ // dumbest, safest possible key derivation, because I'm not a cryptographer.
+ // future versions of the custom demo protocol COULD get something faster
+ // (like something with hchacha20, if only I could find enough info on that)
+ crypto_blake2b(keybox->shr, sizeof(keybox->tmp), keybox->tmp, 96);
+ crypto_wipe(keybox->tmp, sizeof(keybox->tmp));
+ keybox->nonce = 0;
+}
+
+static void wipesessionkeys(void) {
+ crypto_wipe(keybox->prv, offsetof(struct keybox, rng));
+}
+
+HANDLE_EVENT(DemoRecordStarting, void) { if (enabled) newsessionkeys(); }
+HANDLE_EVENT(DemoRecordStopped, int ndemos) { if (enabled) wipesessionkeys(); }
#ifdef _WIN32
static void *gamewin, *inhookwin, *inhookthr;
static ulong inhooktid;
-// UINT_PTR is a **stupid** typedef, but whatever.
-static ssize __stdcall kproc(int code, UINT_PTR wp, ssize lp) {
+static ssize __stdcall kproc(int code, usize wp, ssize lp) {
KBDLLHOOKSTRUCT *data = (KBDLLHOOKSTRUCT *)lp;
- if (lockdown && data->flags & LLKHF_INJECTED &&
+ if (enabled && data->flags & LLKHF_INJECTED &&
GetForegroundWindow() == gamewin) {
- return 1;
+ // maybe this input is reasonable, but log it for closer inspection
+ // TODO(rta): figure out what to do with this stuff
+ // something like the following, but with a proper abstraction...
+ //uchar buf[28 + 16], *p = buf;
+ //msg_putasz4(p, 2); p += 1;
+ // msg_putssz5(p, 8); memcpy(p + 1, "FakeKey", 7); p += 8;
+ // msg_putmsz4(p, 2); p += 1;
+ // msg_putssz5(p, 3); memcpy(p + 1, "vk", 2); p += 3;
+ // p += msg_putu32(p, data->vkCode);
+ // msg_putssz5(p, 3); memcpy(p + 1, "scan", 4); p += 5;
+ // p += msg_putu32(p, data->scanCode);
+ //++keybox->nonce;
+ //// append mac at end of message
+ //crypto_aead_lock_djb(buf, p, keybox->shr, keybox->nonce_bytes, 0, 0,
+ // buf, p - buf);
+ //democustom_write(buf, p - buf + 16);
}
return CallNextHookEx(0, code, wp, lp);
}
-static ssize __stdcall mproc(int code, UINT_PTR wp, ssize lp) {
+static ssize __stdcall mproc(int code, usize wp, ssize lp) {
MSLLHOOKSTRUCT *data = (MSLLHOOKSTRUCT *)lp;
- if (lockdown && data->flags & LLMHF_INJECTED &&
+ if (enabled && data->flags & LLMHF_INJECTED &&
GetForegroundWindow() == gamewin) {
+ // no way this input would ever be reasonable. just discard it
return 1;
}
return CallNextHookEx(0, code, wp, lp);
@@ -72,46 +137,48 @@ static ssize __stdcall mproc(int code, UINT_PTR wp, ssize lp) {
// this is its own thread to meet the strict timing deadline, otherwise the
// hook gets silently removed. plus, we don't wanna incur latency anyway.
static ulong __stdcall inhookthrmain(void *param) {
- volatile u32 *sig = param;
- if (!SetWindowsHookExW(WH_KEYBOARD_LL, &kproc, 0, 0) ||
- !SetWindowsHookExW(WH_MOUSE_LL, &mproc, 0, 0)) {
- *sig = 2;
+ volatile int *sig = param;
+ if (!SetWindowsHookExW(WH_KEYBOARD_LL, (HOOKPROC)&kproc, 0, 0) ||
+ !SetWindowsHookExW(WH_MOUSE_LL, (HOOKPROC)&mproc, 0, 0)) {
+ fastspin_raise(sig, 2);
return -1;
}
- *sig = 1;
+ fastspin_raise(sig, 1);
MSG m; int ret;
while ((ret = GetMessageW(&m, inhookwin, 0, 0)) > 0) DispatchMessage(&m);
return ret;
}
-static WNDPROC orig_wndproc;
-static ssize __stdcall hook_wndproc(void *wnd, uint msg, UINT_PTR wp, ssize lp) {
- if (msg == WM_COPYDATA && lockdown) return DefWindowProcW(wnd, msg, wp, lp);
- return orig_wndproc(wnd, msg, wp, lp);
+static ssize orig_wndproc;
+static ssize __stdcall hook_wndproc(void *wnd, uint msg, usize wp, ssize lp) {
+ if (msg == WM_COPYDATA && enabled) return DefWindowProcW(wnd, msg, wp, lp);
+ return CallWindowProcA((WNDPROC)orig_wndproc, wnd, msg, wp, lp);
}
static bool win32_init(void) {
- gamewin = FindWindowW(L"Valve001", 0);
+ // note: using A instead of W to avoid some weirdness with handles...
+ gamewin = FindWindowA("Valve001", 0);
// note: error messages here are a bit cryptic on purpose, but easy to find
// in the code. in other words, we're hiding in plain sight :-)
if (!gamewin) {
errmsg_errorsys("failed to find window");
return false;
}
- orig_wndproc = (WNDPROC)SetWindowLongPtrW(gamewin, GWLP_WNDPROC,
- (ssize)hook_wndproc);
- if (!orig_wndproc) {
+ orig_wndproc = SetWindowLongPtrA(gamewin, GWLP_WNDPROC,
+ (ssize)&hook_wndproc);
+ if (!orig_wndproc) { // XXX: assuming 0 won't be legitimately returned
errmsg_errorsys("failed to attach message handler");
+ return false;
}
return true;
}
static void win32_end(void) {
// no error handling here because we'd crash either way. good luck!
- SetWindowLongW(gamewin, GWLP_WNDPROC, (ssize)orig_wndproc);
+ SetWindowLongPtrA(gamewin, GWLP_WNDPROC, orig_wndproc);
}
-static void inhook_start(volatile u32 *sig) {
+static void inhook_start(volatile int *sig) {
inhookwin = CreateWindowW(L"sst-eventloop", L"sst-eventloop", WS_DISABLED,
0, 0, 0, 0, HWND_MESSAGE, 0, 0, 0);
inhookthr = CreateThread(0, 0, &inhookthrmain, (u32 *)sig, 0, &inhooktid);
@@ -127,7 +194,7 @@ static void inhook_check(void) {
// won't matter in practice but... this kind of sucks.
con_warn("** sst: ERROR in message loop, abandoning RTA mode! **");
// TODO(rta): stop demos, and stuff.
- lockdown = false;
+ enabled = false;
}
}
}
@@ -138,12 +205,14 @@ static void inhook_stop(void) {
errmsg_warnsys("couldn't wait for thread, status unknown");
// XXX: now what!?
}
- // assume WAIT_OBJECT_0
- ulong status;
- GetExitCodeThread(inhookthr, &status);
- if (status) {
- // not much else we can do now!
- con_warn("warning: RTA mode message loop had an error during shutdown");
+ else {
+ // assume WAIT_OBJECT_0
+ ulong status;
+ GetExitCodeThread(inhookthr, &status);
+ if (status) {
+ // not much else we can do now!
+ errmsg_errorx("message loop didn't shut down cleanly\n");
+ }
}
CloseHandle(inhookthr);
}
@@ -155,22 +224,18 @@ static void inhook_stop(void) {
#endif
bool ac_enable(void) {
- if (lockdown) return true;
+ if (enabled) return true;
#ifdef _WIN32
- // and now for some frivolously microoptimised spinlocking nonsense
- volatile u32 sig = 0; // paranoid volatile to ensure no loop misopt...
+ volatile int sig = 0;
inhook_start(&sig);
- register u32 x; // avoid double-reading the volatile
- // pausing in the middle here seems to produce shorter asm with gcc -O2 and
- // clang -O3 in godbolt (avoids unrolling of head which is unlikely to help)
- while (x = sig, _mm_pause(), !x);
- if (x == 2) { // else 1 for success
+ fastspin_wait(&sig);
+ if (sig == 2) { // else 1 for success
con_warn("** sst: ERROR starting message loop, can't continue! **");
CloseHandle(inhookthr);
return false;
}
#endif
- lockdown = true;
+ enabled = true;
return true;
}
@@ -178,16 +243,16 @@ HANDLE_EVENT(Tick, bool simulating) {
#ifdef _WIN32
static uint fewticks = 0;
// just check this every so often (roughly 0.1-0.3s depending on game)
- if (lockdown && !(++fewticks & 7)) inhook_check();
+ if (enabled && !(++fewticks & 7)) inhook_check();
#endif
}
void ac_disable(void) {
- if (!lockdown) return;
+ if (!enabled) return;
#ifdef _WIN32
inhook_stop();
#endif
- lockdown = false;
+ enabled = false;
}
enum /* from InputEventType_t - terser names used here */ {
@@ -216,15 +281,22 @@ typedef void (*VCALLCONV DispatchInputEvent_func)(void *, struct inputevent *);
static DispatchInputEvent_func orig_DispatchInputEvent;
static void VCALLCONV hook_DispatchInputEvent(void *this,
struct inputevent *ev) {
- // TODO(rta): do something here! (here's a quick reference/example)
- //switch (ev->type) {
- // CASES(BTNDOWN, BTNUP, BTNDOUBLECLICK):
- // const char *desc[] = {"DOWN", "UP", "DOUBLE"};
- // const char *binding = bind_get(ev->data);
- // if (!binding) binding = "[unbound]";
- // con_msg("key %d %s => %s\n", ev->data, desc[ev->type - BTNDOWN],
- // binding);
- //}
+ //const char *desc[] = {"DOWN", "UP", "DBL"};
+ //const char desclen[] = {4, 2, 3};
+ switch (ev->type) {
+ CASES(BTNDOWN, BTNUP, BTNDOUBLECLICK):;
+ // TODO(rta): do something interesting with button data
+ //uchar buf[28], *p = buf;
+ //msg_putasz4(p, 2); p += 1;
+ // msg_putssz5(p, 8); memcpy(p + 1, "KeyInput", 8); p += 9;
+ // msg_putmsz4(p, 2); p += 1;
+ // msg_putssz5(p, 3); memcpy(p + 1, "key", 3); p += 4;
+ // p += msg_puts32(p, ev->data);
+ // msg_putssz5(p, 3); memcpy(p + 1, "btn", 3); p += 4;
+ // int idx = ev->type - BTNDOWN;
+ // msg_putssz5(p++, desclen[idx]);
+ // memcpy(p, desc[idx], desclen[idx]); p += desclen[idx];
+ }
orig_DispatchInputEvent(this, ev);
}
@@ -242,7 +314,7 @@ static bool find_DispatchInputEvent(void) {
return false;
}
void *cgame;
- const uchar *insns = (const uchar*)VFUNC(gameuifuncs, GetDesktopResolution);
+ const uchar *insns = (const uchar *)VFUNC(gameuifuncs, GetDesktopResolution);
for (const uchar *p = insns; p - insns < 16;) {
if (p[0] == X86_MOVRMW && p[1] == X86_MODRM(0, 1, 5)) {
void **indirect = mem_loadptr(p + 2);
@@ -273,12 +345,27 @@ ok: insns = (const uchar *)VFUNC(cgame, DispatchAllStoredGameMessages);
return false;
}
+HANDLE_EVENT(AllowPluginLoading, bool loading) {
+ if (enabled && demorec_demonum() != -1) {
+ con_warn("sst: plugins cannot be %s while recording a run\n",
+ loading ? "loaded" : "unloaded");
+ return false;
+ }
+ return true;
+}
+
+HANDLE_EVENT(PluginLoaded, void) {
+ // TODO(rta): do something with plugin list here
+}
+HANDLE_EVENT(PluginUnloaded, void) {
+ // TODO(rta): do something with plugin list here
+}
+
+PREINIT {
+ return GAMETYPE_MATCHES(L4D); // TODO(compat): add more here obviously
+}
+
INIT {
-#if defined(_WIN32)
- if (!win32_init()) return false;
-#elif defined(__linux__)
- // TODO(linux): call init things
-#endif
if (!find_DispatchInputEvent()) return false;
orig_DispatchInputEvent = (DispatchInputEvent_func)hook_inline(
(void *)orig_DispatchInputEvent, (void *)&hook_DispatchInputEvent);
@@ -286,16 +373,70 @@ INIT {
errmsg_errorsys("couldn't hook DispatchInputEvent function");
return false;
}
+
+#ifdef _WIN32
+ keybox = VirtualAlloc(0, 4096, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+ if (!keybox) {
+ errmsg_errorsys("couldn't allocate memory for session state");
+ return false;
+ }
+ if (!VirtualLock(keybox, 4096)) {
+ errmsg_errorsys("couldn't secure session state");
+ goto e2;
+ }
+ if (WerRegisterExcludedMemoryBlock(keybox, 4096) != S_OK) {
+ // FIXME: stringify errors properly here
+ errmsg_errorx("couldn't secure session state");
+ goto e2;
+ }
+ if (!win32_init()) goto e;
+#else
+ keybox = mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ if (keybox == MAP_FAILED) {
+ errmsg_errorstd("couldn't allocate memory for session state");
+ return false;
+ }
+ // linux-specific madvise stuff (there are some equivalents in OpenBSD and
+ // FreeBSD, if anyone's wondering, but we don't need to worry about those)
+ if (madvise(keybox, 4096, MADV_DONTFORK) == -1 ||
+ madvise(keybox, 4096, MADV_DONTDUMP) == - 1 ||
+ mlock(keybox, 4096) == -1) {
+ errormsg_errorstd("couldn't secure session state");
+ goto e;
+ }
+ // TODO(linux): call other init things
+#endif
+
+ uchar seed[32];
+ os_randombytes(seed, sizeof(seed));
+ crypto_rng_init(&keybox->rng, seed);
+ if (GAMETYPE_MATCHES(L4D)) {
+ // copy into the keybox so key derivation blake2 gets a nice contiguous
+ // run of bytes
+ memcpy(keybox->lbpub, lbpubkeys[LBPK_L4D], 32);
+ }
return true;
+
+#ifdef _WIN32
+e: WerUnregisterExcludedMemoryBlock(keybox); // this'd better not fail!
+e2: VirtualFree(keybox, 4096, MEM_RELEASE);
+#elif
+e: munmap(keybox, 4096);
+#endif
+ unhook_inline((void *)orig_DispatchInputEvent);
+ return false;
}
END {
+ ac_disable();
#if defined(_WIN32)
+ WerUnregisterExcludedMemoryBlock(keybox); // this'd better not fail!
+ VirtualFree(keybox, 4096, MEM_RELEASE);
win32_end();
#elif defined(__linux__)
- // TODO(linux): call cleanup things
+ munmap(keybox, 4096);
+ // TODO(linux): call other cleanup things
#endif
- ac_disable();
unhook_inline((void *)orig_DispatchInputEvent);
}
diff --git a/src/bitbuf.h b/src/bitbuf.h
index a2ee60f..404dc9d 100644
--- a/src/bitbuf.h
+++ b/src/bitbuf.h
@@ -19,47 +19,50 @@
#include "intdefs.h"
-// NOTE: This code assumes it's running on a little endian machine, because,
-// well, the game runs on a little endian machine.
-// *technically* this could break unit tests in a contrived cross-compile
-// scenario? right now none of the tests care about actual bit values, and we
-// don't cross compile, so this won't matter till later. :)
+// NOTE: This code is not big-endian-safe, because the game itself is little-
+// endian. This could theoretically break tests in odd cross-compile scenarios,
+// but no tests currently look at actual bit values so it's fine for now.
-// handle 8 bytes at a time (COULD do 16 with SSE, but who cares this is fine)
-typedef uvlong bitbuf_cell;
+// handle one machine word at a time (SIMD is probably not worth it... yet?)
+typedef usize bitbuf_cell;
static const int bitbuf_cell_bits = sizeof(bitbuf_cell) * 8;
static const int bitbuf_align = _Alignof(bitbuf_cell);
/* A bit buffer, ABI-compatible with bf_write defined in tier1/bitbuf.h */
struct bitbuf {
union {
- char *buf; /* NOTE: the buffer MUST be aligned as bitbuf_cell! */
- bitbuf_cell *buf_as_cells;
+ char *buf; /* NOTE: the buffer SHOULD be aligned as bitbuf_cell! */
+ bitbuf_cell *cells;
};
- int sz, nbits, curbit;
+ int sz, nbits;
+ uint curbit; // made unsigned so divisions can become shifts (hopefully...)
bool overflow, assert_on_overflow;
const char *debugname;
};
-/* Append a value to the bitbuffer, with a specfied length in bits. */
-static inline void bitbuf_appendbits(struct bitbuf *bb, bitbuf_cell x,
- int nbits) {
+// detail: need a cell internally, but API users shouldn't rely on 64-bit size
+static inline void _bitbuf_append(struct bitbuf *bb, bitbuf_cell x, int nbits) {
int idx = bb->curbit / bitbuf_cell_bits;
int shift = bb->curbit % bitbuf_cell_bits;
// OR into the existing cell (lower bits were already set!)
- bb->buf_as_cells[idx] |= x << shift;
+ bb->cells[idx] |= x << shift;
// assign the next cell (that also clears the upper bits for the next OR)
// if nbits fits in the first cell, this zeros the next cell, which is fine
- bb->buf_as_cells[idx + 1] = x >> (bitbuf_cell_bits - shift);
+ bb->cells[idx + 1] = x >> (bitbuf_cell_bits - shift);
bb->curbit += nbits;
}
-/* Append a byte to the bitbuffer - same as appendbits(8) but more convenient */
+/* Appends a value to the bit buffer, with a specfied length in bits. */
+static inline void bitbuf_appendbits(struct bitbuf *bb, uint x, int nbits) {
+ _bitbuf_append(bb, x, nbits);
+}
+
+/* Appends a byte to the bit buffer. */
static inline void bitbuf_appendbyte(struct bitbuf *bb, uchar x) {
- bitbuf_appendbits(bb, x, 8);
+ _bitbuf_append(bb, x, 8);
}
-/* Append a sequence of bytes to the bitbuffer, with length given in bytes */
+/* Appends a sequence of bytes to the bit buffer, with length given in bytes. */
static inline void bitbuf_appendbuf(struct bitbuf *bb, const char *buf,
uint len) {
// NOTE! This function takes advantage of the fact that nothing unaligned
@@ -67,25 +70,30 @@ static inline void bitbuf_appendbuf(struct bitbuf *bb, const char *buf,
// segfault. This is absolutely definitely technically UB, but it's unit
// tested and apparently works in practice. If something weird happens
// further down the line, sorry!
- usize unalign = (usize)buf % bitbuf_align;
+ usize unalign = (usize)buf & (bitbuf_align - 1);
if (unalign) {
// round down the pointer
- bitbuf_cell *p = (bitbuf_cell *)((usize)buf & ~(bitbuf_align - 1));
+ bitbuf_cell *p = (bitbuf_cell *)((usize)buf - unalign);
// shift the stored value (if it were big endian, the shift would have
// to be the other way, or something)
- bitbuf_appendbits(bb, *p >> (unalign * 8), (bitbuf_align - unalign) * 8);
+ _bitbuf_append(bb, *p >> (unalign << 3), (bitbuf_align - unalign) << 3);
buf += sizeof(bitbuf_cell) - unalign;
len -= unalign;
}
bitbuf_cell *aligned = (bitbuf_cell *)buf;
- for (; len > sizeof(bitbuf_cell); len -= sizeof(bitbuf_cell), ++aligned) {
- bitbuf_appendbits(bb, *aligned, bitbuf_cell_bits);
+ for (; len >= sizeof(bitbuf_cell); len -= sizeof(bitbuf_cell), ++aligned) {
+ _bitbuf_append(bb, *aligned, bitbuf_cell_bits);
}
// unaligned end bytes
- bitbuf_appendbits(bb, *aligned, len * 8);
+ _bitbuf_append(bb, *aligned, len << 3);
+}
+
+/* 0-pad the bit buffer up to the next whole byte boundary. */
+static inline void bitbuf_roundup(struct bitbuf *bb) {
+ bb->curbit += -(uint)bb->curbit & 7;
}
-/* Clear the bitbuffer to make it ready to append new data */
+/* Clear the bit buffer to make it ready to append new data. */
static inline void bitbuf_reset(struct bitbuf *bb) {
bb->buf[0] = 0; // we have to zero out the lowest cell since it gets ORed
bb->curbit = 0;
diff --git a/src/build/cmeta.c b/src/build/cmeta.c
index 7f314c7..40aba3a 100644
--- a/src/build/cmeta.c
+++ b/src/build/cmeta.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2022 Michael Smith <mikesmiffy128@gmail.com>
+ * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -111,15 +111,7 @@ static void die2(const char *s1, const char *s2) {
static char *readsource(const os_char *f) {
int fd = os_open(f, O_RDONLY);
-#ifndef _WIN32
- if (fd == -1) die2("couldn't open ", f);
-#else
- // XXX: this is dumb and bad
- if (fd == -1) {
- fprintf(stderr, "cmeta: fatal: couldn't open %S", f);
- exit(100);
- }
-#endif
+ if (fd == -1) return 0;
uint bufsz = 8192;
char *buf = malloc(bufsz);
if (!buf) die1("couldn't allocate memory");
@@ -146,6 +138,7 @@ struct cmeta;
const struct cmeta *cmeta_loadfile(const os_char *f) {
char *buf = readsource(f);
+ if (!buf) return 0;
#ifdef _WIN32
char *realname = malloc(wcslen(f) + 1);
if (!realname) die1("couldn't allocate memory");
@@ -163,7 +156,7 @@ const struct cmeta *cmeta_loadfile(const os_char *f) {
// NOTE: we don't care about conditional includes, nor do we expand macros. We
// just parse the minimum info to get what we need for SST. Also, there's not
// too much in the way of syntax checking; if an error gets ignored the compiler
-// picks it anyway, and gives far better diagnostics.
+// picks it up anyway, and gives far better diagnostics.
void cmeta_includes(const struct cmeta *cm,
void (*cb)(const char *f, bool issys, void *ctxt), void *ctxt) {
const Token *tp = (const Token *)cm;
diff --git a/src/build/codegen.c b/src/build/codegen.c
index e24a096..bb25395 100644
--- a/src/build/codegen.c
+++ b/src/build/codegen.c
@@ -24,8 +24,14 @@
#include "skiplist.h"
#include "vec.h"
+#ifdef _WIN32
+#define fS "S"
+#else
+#define fS "s"
+#endif
+
static void die(const char *s) {
- fprintf(stderr, "codegen: %s\n", s);
+ fprintf(stderr, "codegen: fatal: %s\n", s);
exit(100);
}
@@ -275,6 +281,11 @@ F( " has_%s = status_%s == FEAT_OK;", f->modname, f->modname)
int OS_MAIN(int argc, os_char *argv[]) {
for (++argv; *argv; ++argv) {
const struct cmeta *cm = cmeta_loadfile(*argv);
+ if (!cm) {
+ fprintf(stderr, "codegen: fatal: couldn't load file %" fS "\n",
+ *argv);
+ exit(100);
+ }
cmeta_conmacros(cm, &oncondef);
cmeta_evdefmacros(cm, &onevdef);
if (!vec_push(&pass2, ((struct passinfo){cm, *argv}))) {
diff --git a/src/chunklets/README b/src/chunklets/README
new file mode 100644
index 0000000..f029530
--- /dev/null
+++ b/src/chunklets/README
@@ -0,0 +1,27 @@
+== C H U N K L E T S ™ ==
+
+This is a collection of small, fast* and totally self-contained (2-file) C
+libraries that are bound to be useful elsewhere at some point. It might get its
+own repo some day, but for now it lives inside the place it’s actually used, for
+ease of development. Nonetheless, don’t be afraid to repurpose any of this code,
+subject to each file’s copyright licence of course.
+
+Each .{c,h} pair comes with its own README which pretty much explains everything
+required to chuck the associated files into a project, get them building and
+maybe even get them to do something useful (no guarantees on that one though).
+
+* well, hopefully fast.
+
+- Why is it called Chunklets? -
+
+> “Chunklets” is a unique and memorable name for your set of {.c, .h} pairs. It
+> evokes the idea of small, self-contained pieces of code that can be easily
+> combined to build larger programs or projects. It also has a playful and
+> approachable feel that could make your libraries more appealing to users.
+> Overall, it’s a great choice for a name!
+
+Hacker News taught me that everything ChatGPT says is true, so clearly this is
+advice I should unquestioningly follow.
+
+Thanks, and have fun!
+- Michael Smith <mikesmiffy128@gmail.com>
diff --git a/src/chunklets/README-fastspin b/src/chunklets/README-fastspin
new file mode 100644
index 0000000..8052415
--- /dev/null
+++ b/src/chunklets/README-fastspin
@@ -0,0 +1,109 @@
+fastspin.{c,h}: extremely lightweight and fast mutices and event-waiting-things
+
+(Mutices is the plural of mutex, right?)
+
+== Compiling ==
+
+ gcc -c -O2 [-flto] fastspin.c
+ clang -c -O2 [-flto] fastspin.c
+ tcc -c fastspin.c
+ cl.exe /c /O2 /std:c17 /experimental:c11atomics fastspin.c
+
+In most cases you can just drop the .c file straight into your codebase/build
+system. LTO is advised to avoid dead code and enable more efficient calls
+including potential inlining.
+
+NOTE: On Windows, it is necessary to link with ntdll.lib.
+
+== Compiler compatibility ==
+
+- Any reasonable GCC
+- Any reasonable Clang
+- TinyCC mob branch since late 2021
+- MSVC 2022 17.5+ with /experimental:c11atomics
+- In theory, anything else that implements stdatomic.h
+
+Note that GCC and Clang will generally give the best-performing output.
+
+Once the .c file is built, the public header can be consumed by virtually any C
+or C++ compiler, as well as probably most half-decent FFIs.
+
+Note that the .c source file is not C++-compatible, only the header is. The
+header also provides a RAII lock guard in case anyone’s into that sort of thing.
+
+== API usage ==
+
+See documentation comments in fastspin.h for a basic idea. Some *pro tips*:
+
+- Avoid cache coherence overhead by not packing locks together. Ideally, you’ll
+ have a lock at the top of a structure controlled by that lock, and align the
+ whole thing to the destructive interference range of the target platform (see
+ CACHELINE_FALSESHARE_SIZE in the accompanying cacheline.h).
+
+- Avoid putting more than one lock in a cache line. Ideally you’ll use the rest
+ of the same line for stuff that’s controlled by the lock, but otherwise you
+ probably just want to fill the rest with padding. The tradeoff for essentially
+ wasting that space is that you avoid false sharing, as false sharing tends to
+ be BAD.
+
+- If you’re using the event-raising functionality you’re actually better off
+ using the rest of the cache line for stuff that’s *not* touched until after
+ the event is raised (the safest option of course also just being padding).
+
+- You should actually measure this stuff, I dunno man.
+
+Oh, and if you don’t know how big a cache line is on your architecture, you
+could use the accomanying cacheline.h to get some reasonable guesses. Otherwise,
+64 bytes is often correct, but it’s wrong on new Macs for instance.
+
+== OS compatibility ==
+
+First-class:
+- Linux 2.6+ (glibc or musl)
+- FreeBSD 11+
+- OpenBSD 6.2+
+- NetBSD ~9.1+
+- DragonFly 1.1+
+- Windows 8+ (only tested on 10+)
+- macOS/Darwin since ~2016(?) (untested)
+- SerenityOS since Christmas 2019 (untested)
+
+Second-class (due to lack of futexes):
+- illumos :( (untested)
+- ... others?
+
+* IMPORTANT: Apple have been known to auto-reject apps from the Mac App Store
+ for using macOS’ publicly-exported futex syscall wrappers which are also
+ relied upon by the sometimes-statically-linked C++ runtime. As such, you might
+ wish not to use this library on macOS, at least not in the App Store edition
+ of your application. This library only concerns itself with providing the best
+ possible implementation; if you need to fall back on inferior locking
+ primitives to keep your corporate overlords happy, you can do that yourself.
+
+== Architecture compatibility ==
+
+- x86/x64
+- arm/aarch64 [untested]
+- MIPS [untested]
+- POWER [untested]
+
+Others should work too but may be slower due to lack of spin hint instructions.
+Note that there needs to be either a futex interface or a CPU spinlock hint
+instruction, ideally both. Otherwise performance will be simply no good during
+contention. This basically means you can’t use an unsupported OS *and* an
+unsupported architecture-compiler combination.
+
+== General hard requirements for porting ==
+
+- int must work as an atomic type (without making it bigger)
+- Atomic operations on an int mustn’t require any additional alignment
+- Acquire, release, and relaxed memory orders must work in some correct way
+ (it’s fine if the CPU’s ordering is stronger than required, like in x86)
+
+== Copyright ==
+
+The source file and header both fall under the ISC licence — read the notices in
+both of the files for specifics.
+
+Thanks, and have fun!
+- Michael Smith <mikesmiffy128@gmail.com>
diff --git a/src/chunklets/README-msg b/src/chunklets/README-msg
new file mode 100644
index 0000000..53d19f1
--- /dev/null
+++ b/src/chunklets/README-msg
@@ -0,0 +1,55 @@
+msg.{c,h}: fast low-level msgpack encoding
+
+== Compiling ==
+
+ gcc -c -O2 [-flto] msg.c
+ clang -c -O2 [-flto] msg.c
+ tcc -c msg.c
+ cl.exe /c /O2 msg.c
+
+In most cases you can just drop the .c file straight into your codebase/build
+system. LTO is advised to avoid dead code and enable more efficient calls
+including potential inlining.
+
+== Compiler compatibility ==
+
+- Any reasonable GCC
+- Any reasonable Clang
+- Any reasonable MSVC
+- TinyCC
+- Probably almost all others; this is very portable code
+
+Note that GCC and Clang will generally give the best-performing output.
+
+Once the .c file is built, the public header can be consumed by virtually any C
+or C++ compiler, as well as probably most half-decent FFIs.
+
+Note that the .c source file is not C++-compatible, only the header is. The
+source file relies on union type-punning, which is well-defined in C but
+undefined behaviour in C++.
+
+== API Usage ==
+
+See documentation comments in msg.h for a basic idea. Note that this library is
+very low-level and probably best suited use with some sort of metaprogramming/
+code-generation, or bindings to a higher-level langauge.
+
+== OS Compatibility ==
+
+- All.
+- Seriously, this library doesn’t even use libc.
+
+== Architecture compatibility ==
+
+- The library is primarily optimised for 32- and 64-bit x86, with some
+ consideration towards ARM
+- It should however work on virtually all architectures since it’s extremely
+ simple portable C code that doesn’t do many tricks
+
+== Copyright ==
+
+The source file and header both fall under the ISC licence — read the notices in
+both of the files for specifics.
+
+Thanks, and have fun!
+- Michael Smith <mikesmiffy128@gmail.com>
diff --git a/src/chunklets/cacheline.h b/src/chunklets/cacheline.h
new file mode 100644
index 0000000..cadd55d
--- /dev/null
+++ b/src/chunklets/cacheline.h
@@ -0,0 +1,45 @@
+/* This file is dedicated to the public domain. */
+
+#ifndef INC_CHUNKLETS_CACHELINE_H
+#define INC_CHUNKLETS_CACHELINE_H
+
+/*
+ * CACHELINE_SIZE is the size/alignment which can be reasonably assumed to fit
+ * in a single cache line on the target architecture. Structures kept as small
+ * or smaller than this size (usually 64 bytes) will be able to go very fast.
+ */
+#ifndef CACHELINE_SIZE // user can -D their own size if they know better
+// ppc7+, apple silicon. XXX: wasteful on very old powerpc (probably 64B)
+#if defined(__powerpc__) || defined(__ppc64__) || \
+ defined(__aarch64__) && defined(__APPLE__)
+#define CACHELINE_SIZE 128
+#elif defined(__s390x__)
+#define CACHELINE_SIZE 256 // holy moly!
+#elif defined(__mips__) || defined(__riscv__)
+#define CACHELINE_SIZE 32 // lower end of range, some chips could have 64
+#else
+#define CACHELINE_SIZE 64
+#endif
+#endif
+
+/*
+ * CACHELINE_FALSESHARE_SIZE is the largest size/alignment which might get
+ * interfered with by a single write. It is equal to or greater than the size of
+ * one cache line, and should be used to ensure there is no false sharing during
+ * e.g. lock contention, or atomic fetch-increments on queue indices.
+ */
+#ifndef CACHELINE_FALSESHARE_SIZE
+// modern intel CPUs sometimes false-share *pairs* of cache lines
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_X86) || \
+ defined(_M_IX86)
+#define CACHELINE_FALSESHARE_SIZE (CACHELINE_SIZE * 2)
+#elif CACHELINE_SIZE < 64
+#define CACHELINE_FALSESHARE_SIZE 64 // be paranoid on mips and riscv
+#else
+#define CACHELINE_FALSESHARE_SIZE CACHELINE_SIZE
+#endif
+#endif
+
+#endif
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/chunklets/fastspin.c b/src/chunklets/fastspin.c
new file mode 100644
index 0000000..bfaaf9b
--- /dev/null
+++ b/src/chunklets/fastspin.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef __cplusplus
+#error This file should not be compiled as C++. It relies on C-specific \
+keywords and APIs which have syntactically different equivalents for C++.
+#endif
+
+#include <stdatomic.h>
+
+#include "fastspin.h"
+
+_Static_assert(sizeof(int) == sizeof(_Atomic int),
+ "This library assumes that ints in memory can be treated as atomic");
+_Static_assert(_Alignof(int) == _Alignof(_Atomic int),
+ "This library assumes that atomic operations do not need over-alignment");
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__TINYC__)
+#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32) || \
+ defined(__mips__) // same asm syntax for pause
+#define RELAX() __asm__ volatile ("pause" ::: "memory")
+#elif defined(__arm__) || defined(__aarch64__)
+#define RELAX() __asm__ volatile ("yield" ::: "memory")
+#elif defined(__powerpc__) || defined(__ppc64__)
+// POWER7 (2010) - older arches may be less efficient
+#define RELAX() __asm__ volatile ("or 27, 27, 27" ::: "memory")
+#endif
+#elif defined(_MSC_VER)
+#if defined(_M_ARM || _M_ARM64)
+#define RELAX() __yield()
+#else
+void _mm_pause(); // don't pull in emmintrin.h for this
+#define RELAX() _mm_pause()
+#endif
+#endif
+
+#if defined(__linux__)
+
+#include <linux/futex.h>
+#include <sys/syscall.h>
+
+// some arches only have a _time64 variant. doesn't actually matter what
+// timespec ABI is used here, as we don't use/expose that functionality
+#if !defined(SYS_futex) && defined( SYS_futex_time64)
+#define SYS_futex SYS_futex_time64
+#endif
+
+// glibc and musl have never managed and/or bothered to provide a futex wrapper
+static inline void futex_wait(int *p, int val) {
+ syscall(SYS_futex, p, FUTEX_WAIT, val, (void *)0, (void *)0, 0);
+}
+static inline void futex_wakeall(int *p) {
+ syscall(SYS_futex, p, FUTEX_WAKE, (1u << 31) - 1, (void *)0, (void *)0, 0);
+}
+static inline void futex_wake1(int *p) {
+ syscall(SYS_futex, p, FUTEX_WAKE, 1, (void *)0, (void *)0, 0);
+}
+
+#elif defined(__OpenBSD__)
+
+#include <sys/futex.h>
+
+// OpenBSD just emulates the Linux call but it still provides a wrapper! Yay!
+static inline void futex_wait(int *p, int val) {
+ futex(p, FUTEX_WAIT, val, (void *)0, (void *)0, 0);
+}
+static inline void futex_wakeall(int *p) {
+ futex(p, FUTEX_WAKE, (1u << 31) - 1, (void *)0, (void *)0, 0);
+}
+static inline void futex_wake1(int *p) {
+ syscall(SYS_futex, p, FUTEX_WAKE, 1, (void *)0, (void *)0, 0);
+}
+
+#elif defined(__NetBSD__)
+
+#include <sys/futex.h> // for constants
+#include <sys/syscall.h>
+#include <unistd.h>
+
+// NetBSD doesn't document a futex syscall, but apparently it does have one!?
+// Their own pthreads library still doesn't actually use it, go figure. Also, it
+// takes an extra parameter for some reason.
+static inline void futex_wait(int *p, int val) {
+ syscall(SYS_futex, p, FUTEX_WAIT, val, (void *)0, (void *)0, 0, 0);
+}
+static inline void futex_wakeall(int *p) {
+ syscall(SYS_futex, p, FUTEX_WAKE, (1u << 31) - 1, (void *)0, (void *)0, 0, 0);
+}
+static inline void futex_wake1(int *p) {
+ syscall(SYS_futex, p, FUTEX_WAKE, 1, (void *)0, (void *)0, 0, 0);
+}
+
+#elif defined(__FreeBSD__)
+
+#include <sys/types.h> // ugh still no IWYU everywhere. maybe next year
+#include <sys/umtx.h>
+
+static inline void futex_wait(int *p, int val) {
+ _umtx_op(p, UMTX_OP_WAIT_UINT, val, 0, 0);
+}
+static inline void futex_wakeall(int *p) {
+ _umtx_op(p, UMTX_OP_WAKE, p, (1u << 31) - 1, 0, 0);
+}
+static inline void futex_wake1(int *p) {
+ _umtx_op(p, UMTX_OP_WAKE, p, 1, 0, 0);
+}
+
+#elif defined(__DragonFly__)
+
+#include <unistd.h>
+
+// An actually good interface. Thank you Matt, very cool.
+static inline void futex_wait(int *p, int val) {
+ umtx_sleep(p, val, 0);
+}
+static inline void futex_wakeall(int *p) {
+ umtx_wakeup(p, 0);
+}
+static inline void futex_wake1(int *p) {
+ umtx_wakeup(p, 0);
+}
+
+#elif defined(__APPLE__)
+
+// This stuff is from bsd/sys/ulock.h in XNU. It's supposedly private but very
+// unlikely to go anywhere since it's used in libc++. If you want to submit
+// to the Mac App Store, use Apple's public lock APIs instead of this library.
+extern int __ulock_wait(unsigned int op, void *addr, unsigned long long val,
+ unsigned int timeout);
+extern int __ulock_wake(unsigned int op, void *addr, unsigned long long val);
+
+#define UL_COMPARE_AND_WAIT 1
+#define ULF_WAKE_ALL 0x100
+#define ULF_NO_ERRNO 0x1000000
+
+static inline void futex_wait(int *p, int val) {
+ __ulock_wait(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, p, val, 0);
+}
+static inline void futex_wakeall(int *p) {
+ __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO | ULF_WAKE_ALL, uaddr, 0);
+}
+static inline void futex_wake1(int *p) {
+ __ulock_wake(UL_COMPARE_AND_WAIT | ULF_NO_ERRNO, uaddr, 0);
+}
+
+#elif defined(_WIN32)
+
+#ifdef _WIN64
+typedef unsigned long long usize;
+#else
+typedef unsigned long usize;
+#endif
+
+// There's no header for these because NTAPI. Plus Windows.h sucks anyway.
+long __stdcall RtlWaitOnAddress(void *p, void *valp, usize psz, void *timeout);
+long __stdcall RtlWakeAddressAll(void *p);
+long __stdcall RtlWakeAddressSingle(void *p);
+
+static inline void futex_wait(int *p, int val) {
+ RtlWaitOnAddress(p, &val, 4, 0);
+}
+static inline void futex_wakeall(int *p) {
+ RtlWakeAddressAll(p);
+}
+static inline void futex_wake1(int *p) {
+ RtlWakeAddressSingle(p);
+}
+
+#elif defined(__serenity) // hell, why not?
+
+#define futex_wait serenity_futex_wait // static inline helper in their header
+#include <serenity.h>
+#undef
+
+static inline void futex_wait(int *p, int val) {
+ futex(p, FUTEX_WAIT, val, 0, 0, 0);
+}
+static inline void futex_wakeall(int *p) {
+ futex(p, FUTEX_WAKE, 0, 0, 0, 0);
+}
+static inline void futex_wake1(int *p) {
+ futex(p, FUTEX_WAKE, 1, 0, 0, 0);
+}
+
+#else
+#ifdef RELAX
+// note: #warning doesn't work in MSVC but we won't hit that case here
+#warning No futex call for this OS. Falling back on pure spinlock. \
+Performance will suffer during contention.
+#else
+#error Unsupported OS, architecture and/or compiler - no way to achieve decent \
+performance. Need either CPU spinlock hints or futexes, ideally both.
+#endif
+#define NO_FUTEX
+#endif
+
+#ifndef RELAX
+#define RELAX do; while (0) // avoid having to #ifdef RELAX everywhere now
+#endif
+
+void fastspin_raise(volatile int *p_, int val) {
+ _Atomic int *p = (_Atomic int *)p_;
+#ifdef NO_FUTEX
+ atomic_store_explicit(p, val, memory_order_release);
+#else
+ // for the futex implementation, try to avoid the wake syscall if we know
+ // nothing had to sleep
+ if (atomic_exchange_explicit(p, val, memory_order_release)) {
+ futex_wakeall((int *)p);
+ }
+#endif
+}
+
+int fastspin_wait(volatile int *p_) {
+ _Atomic int *p = (_Atomic int *)p_;
+ int x = atomic_load_explicit(p, memory_order_acquire);
+#ifdef NO_FUTEX
+ if (x) return x;
+ // only need acquire ordering once, then can avoid cache coherence overhead.
+ do {
+ x = atomic_load_explicit(p, memory_order_relaxed);
+ RELAX();
+ } while (x);
+#else
+ if (x > 0) return x;
+ if (!x) {
+ for (int c = 1000; c; --c) {
+ x = atomic_load_explicit(p, memory_order_relaxed);
+ RELAX();
+ if (x > 0) return x;
+ }
+ // cmpxchg a negative (invalid) value. this will fail in two cases:
+ // 1. someone else already cmpxchg'd: the futex_wait() will work fine
+ // 2. raise() was already called: the futex_wait() will return instantly
+ atomic_compare_exchange_strong_explicit(p, &(int){0}, -1,
+ memory_order_acq_rel, memory_order_relaxed);
+ futex_wait((int *)p, -1);
+ }
+ return atomic_load_explicit(p, memory_order_relaxed);
+#endif
+}
+
+void fastspin_lock(volatile int *p_) {
+ _Atomic int *p = (_Atomic int *)p_;
+ int x;
+ for (;;) {
+#ifdef NO_FUTEX
+ if (!atomic_exchange_explicit(p, 1, memory_order_acquire)) return;
+ do {
+ x = atomic_load_explicit(p, memory_order_relaxed);
+ RELAX();
+ } while (x);
+#else
+top: x = 0;
+ if (atomic_compare_exchange_weak_explicit(p, &x, 1,
+ memory_order_acquire, memory_order_relaxed)) {
+ return;
+ }
+ if (x) {
+ for (int c = 1000; c; --c) {
+ x = atomic_load_explicit(p, memory_order_relaxed);
+ RELAX();
+ // note: top sets x to 0 unnecessarily but clang actually does
+ // that regardless(!), probably to break loop-carried dependency
+ if (!x) goto top;
+ }
+ atomic_compare_exchange_strong_explicit(p, &(int){0}, -1,
+ memory_order_acq_rel, memory_order_relaxed);
+ futex_wait((int *)p, -1); // (then spin once more to avoid spuria)
+ }
+#endif
+ }
+}
+
+void fastspin_unlock(volatile int *p_) {
+ _Atomic int *p = (_Atomic int *)p_;
+#ifdef NO_FUTEX
+ atomic_store_explicit((_Atomic int *)p, 0, memory_order_release);
+#else
+ if (atomic_exchange_explicit(p, 0, memory_order_release) < 0) {
+ futex_wake1((int *)p);
+ }
+#endif
+}
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/chunklets/fastspin.h b/src/chunklets/fastspin.h
new file mode 100644
index 0000000..6c0c5f7
--- /dev/null
+++ b/src/chunklets/fastspin.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef INC_CHUNKLETS_FASTSPIN_H
+#define INC_CHUNKLETS_FASTSPIN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Raises an event through p to 0 or more callers of fastspin_wait().
+ * val must be positive, and can be used to signal a specific condition.
+ */
+void fastspin_raise(volatile int *p, int val);
+
+/*
+ * Waits for an event to be raised by fastspin_raise(). Allows this and possibly
+ * some other threads to wait for one other thread to signal its status.
+ *
+ * Returns the positive value that was passed to fastspin_raise().
+ */
+int fastspin_wait(volatile int *p);
+
+/*
+ * Takes a mutual exclusion, i.e. a lock. *p must be initialised to 0 before
+ * anything starts using it as a lock.
+ */
+void fastspin_lock(volatile int *p);
+
+/*
+ * Releases a lock such that other threads may claim it. Immediately as a lock
+ * is released, its value will be 0, as though it had just been initialised.
+ */
+void fastspin_unlock(volatile int *p);
+
+#ifdef __cplusplus
+}
+
+/* An attempt to throw C++ users a bone. Should be self-explanatory. */
+struct fastspin_lock_guard {
+ fastspin_lock_guard(volatile int &i): _p(&i) { fastspin_lock(_p); }
+ fastspin_lock_guard() = delete;
+ ~fastspin_lock_guard() { fastspin_unlock(_p); }
+ volatile int *_p;
+};
+
+#endif
+
+#endif
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/chunklets/msg.c b/src/chunklets/msg.c
new file mode 100644
index 0000000..0e26a80
--- /dev/null
+++ b/src/chunklets/msg.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifdef __cplusplus
+#error This file should not be compiled as C++. It relies on C-specific union \
+behaviour which is undefined in C++.
+#endif
+
+// _Static_assert needs MSVC >= 2019, and this check is irrelevant on Windows
+#ifndef _MSC_VER
+_Static_assert(
+ (unsigned char)-1 == 255 &&
+ sizeof(short) == 2 &&
+ sizeof(int) == 4 &&
+ sizeof(long long) == 8 &&
+ sizeof(float) == 4 &&
+ sizeof(double) == 8,
+ "this code is only designed for relatively sane environments, plus Windows"
+);
+#endif
+
+// -- A note on performance hackery --
+//
+// Clang won't emit byte-swapping instructions in place of bytewise array writes
+// unless nothing else is written to the same array. MSVC won't do it at all.
+// For these compilers on little-endian platforms that can also do unaligned
+// writes efficiently, we do so explicitly and handle the byte-swapping
+// manually, which then tends to get optimised pretty well.
+//
+// GCC, somewhat surprisingly, seems to be much better at optimising the naïve
+// version of the code, so we don't try to do anything clever there. Also, for
+// unknown, untested compilers and/or platforms, we stick to the safe approach.
+#if defined(_MSC_VER) || defined(__clang__) && (defined(__x86_64__) || \
+ defined(__i386__) || defined(__aarch64__) || defined(__arm__))
+#define USE_BSWAP_NONSENSE
+#endif
+
+#ifdef USE_BSWAP_NONSENSE
+#if defined(_MSC_VER) && !defined(__clang__)
+// MSVC prior to 2022 won't even optimise shift/mask swaps into a bswap
+// instruction. Screw it, just use the intrinsics.
+unsigned long _byteswap_ulong(unsigned long);
+unsigned long long _byteswap_uint64(unsigned long long);
+#define swap32 _byteswap_ulong
+#define swap64 _byteswap_uint64
+#else
+static inline unsigned int swap32(unsigned int x) {
+ return x >> 24 | x << 24 | x >> 8 & 0xFF00 | x << 8 & 0xFF0000;
+}
+static inline unsigned long long swap64(unsigned long long x) {
+ return x >> 56 | x << 56 |
+ x >> 40 & 0xFF00 | x << 40 & 0xFF000000000000 |
+ x >> 24 & 0xFF0000 | x << 24 & 0xFF0000000000 |
+ x >> 8 & 0xFF000000 | x << 8 & 0xFF00000000;
+}
+#endif
+#endif
+
+static inline void doput16(unsigned char *out, unsigned short val) {
+#ifdef USE_BSWAP_NONSENSE
+ // Use swap32() here because x86 and ARM don't have instructions for 16-bit
+ // swaps, and Clang doesn't realise it could just use the 32-bit one anyway.
+ *(unsigned short *)(out + 1) = swap32(val) >> 16;
+#else
+ out[1] = val >> 8; out[2] = val;
+#endif
+}
+
+static inline void doput32(unsigned char *out, unsigned int val) {
+#ifdef USE_BSWAP_NONSENSE
+ *(unsigned int *)(out + 1) = swap32(val);
+#else
+ out[1] = val >> 24; out[2] = val >> 16; out[3] = val >> 8; out[4] = val;
+#endif
+}
+
+static inline void doput64(unsigned char *out, unsigned int val) {
+#ifdef USE_BSWAP_NONSENSE
+ // Clang is smart enough to make this into two bswaps and a word swap in
+ // 32-bit builds. MSVC seems to be fine too when using the above intrinsics.
+ *(unsigned long long *)(out + 1) = swap64(val);
+#else
+ out[1] = val >> 56; out[2] = val >> 48;
+ out[3] = val >> 40; out[4] = val >> 32;
+ out[5] = val >> 24; out[6] = val >> 16;
+ out[7] = val >> 8; out[8] = val;
+#endif
+}
+
+void msg_putnil(unsigned char *out) {
+ *out = 0xC0;
+}
+
+void msg_putbool(unsigned char *out, _Bool val) {
+ *out = 0xC2 | val;
+}
+
+void msg_puti7(unsigned char *out, signed char val) {
+ *out = val; // oh, so a fixnum is just the literal byte! genius!
+}
+
+int msg_puts8(unsigned char *out, signed char val) {
+ int off = val < -32; // out of -ve fixnum range?
+ out[0] = 0xD0;
+ out[off] = val;
+ return off + 1;
+}
+
+int msg_putu8(unsigned char *out, unsigned char val) {
+ int off = val > 127; // out of +ve fixnum range?
+ out[0] = 0xCC;
+ out[off] = val;
+ return off + 1;
+}
+
+int msg_puts16(unsigned char *out, short val) {
+ if (val >= -128 && val <= 127) return msg_puts8(out, val);
+ out[0] = 0xD1;
+ doput16(out, val);
+ return 3;
+}
+
+int msg_putu16(unsigned char *out, unsigned short val) {
+ if (val <= 255) return msg_putu8(out, val);
+ out[0] = 0xCD;
+ doput16(out, val);
+ return 3;
+}
+
+int msg_puts32(unsigned char *out, int val) {
+ if (val >= -32768 && val <= 32767) return msg_puts16(out, val);
+ out[0] = 0xD2;
+ doput32(out, val);
+ return 5;
+}
+
+int msg_putu32(unsigned char *out, unsigned int val) {
+ if (val <= 65535) return msg_putu16(out, val);
+ out[0] = 0xCE;
+ doput32(out, val);
+ return 5;
+}
+
+int msg_puts(unsigned char *out, long long val) {
+ if (val >= -2147483648 && val <= 2147483647) {
+ return msg_puts32(out, val);
+ }
+ out[0] = 0xD3;
+ doput64(out, val);
+ return 9;
+}
+
+int msg_putu(unsigned char *out, unsigned long long val) {
+ if (val <= 4294967295) return msg_putu32(out, val);
+ out[0] = 0xCF;
+ doput64(out, val);
+ return 9;
+}
+
+static inline unsigned int floatbits(float f) {
+ return (union { float f; unsigned int i; }){f}.i;
+}
+
+static inline unsigned long long doublebits(double d) {
+ return (union { double d; unsigned long long i; }){d}.i;
+}
+
+void msg_putf(unsigned char *out, float val) {
+ out[0] = 0xCA;
+ doput32(out, floatbits(val));
+}
+
+int msg_putd(unsigned char *out, double val) {
+ // XXX: is this really the most efficient way to check this?
+ float f = val;
+ if ((double)f == val) { msg_putf(out, f); return 5; }
+ out[0] = 0xCA;
+ doput64(out, doublebits(val));
+ return 9;
+}
+
+void msg_putssz5(unsigned char *out, int sz) {
+ *out = 0xA0 | sz;
+}
+
+int msg_putssz8(unsigned char *out, int sz) {
+ if (sz < 64) { msg_putssz5(out, sz); return 1; }
+ out[0] = 0xD9;
+ out[1] = sz;
+ return 2;
+}
+
+int msg_putssz16(unsigned char *out, int sz) {
+ if (sz < 256) return msg_putssz8(out, sz);
+ out[0] = 0xDA;
+ doput16(out, sz);
+ return 3;
+}
+
+int msg_putssz(unsigned char *out, unsigned int sz) {
+ if (sz < 65536) return msg_putssz16(out, sz);
+ out[0] = 0xDB;
+ doput32(out, sz);
+ return 5;
+}
+
+void msg_putbsz8(unsigned char *out, int sz) {
+ out[0] = 0xC4;
+ out[1] = sz;
+}
+
+int msg_putbsz16(unsigned char *out, int sz) {
+ if (sz < 256) { msg_putbsz8(out, sz); return 2; }
+ out[0] = 0xC5;
+ doput16(out, sz);
+ return 2 + sz;
+}
+
+int msg_putbsz(unsigned char *out, unsigned int sz) {
+ if (sz < 65536) return msg_putbsz16(out, sz);
+ out[0] = 0xC6;
+ doput32(out, sz);
+ return 5;
+}
+
+void msg_putasz4(unsigned char *out, int sz) {
+ *out = 0x90 | sz;
+}
+
+int msg_putasz16(unsigned char *out, int sz) {
+ if (sz < 32) { msg_putasz4(out, sz); return 1; }
+ out[0] = 0xDC;
+ doput16(out, sz);
+ return 3;
+}
+
+int msg_putasz(unsigned char *out, unsigned int sz) {
+ if (sz < 65536) return msg_putasz16(out, sz);
+ out[0] = 0xDD;
+ doput32(out, sz);
+ return 5;
+}
+
+void msg_putmsz4(unsigned char *out, int sz) {
+ *out = 0x80 | sz;
+}
+
+int msg_putmsz16(unsigned char *out, int sz) {
+ if (sz < 32) { msg_putmsz4(out, sz); return 1; }
+ out[0] = 0xDE;
+ doput16(out, sz);
+ return 3;
+}
+
+int msg_putmsz(unsigned char *out, unsigned int sz) {
+ if (sz < 65536) return msg_putmsz16(out, sz);
+ out[0] = 0xDF;
+ doput32(out, sz);
+ return 5;
+}
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/chunklets/msg.h b/src/chunklets/msg.h
new file mode 100644
index 0000000..b85bde3
--- /dev/null
+++ b/src/chunklets/msg.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef INC_CHUNKLETS_MSG_H
+#define INC_CHUNKLETS_MSG_H
+
+#ifdef __cplusplus
+#define _msg_Bool bool
+extern "C" {
+#else
+#define _msg_Bool _Bool
+#endif
+
+/*
+ * Writes a nil (null) message to the buffer out. Always writes a single byte.
+ *
+ * out must point to at least 1 byte.
+ */
+void msg_putnil(unsigned char *out);
+
+/*
+ * Writes the boolean val to the buffer out. Always writes a single byte.
+ *
+ * out must point to at least 1 byte.
+ */
+void msg_putbool(unsigned char *out, _msg_Bool val);
+
+/*
+ * Writes the integer val in the range [-32, 127] to the buffer out. Values
+ * outside this range will produce an undefined encoding. Always writes a single
+ * byte.
+ *
+ * out must point to at least 1 byte.
+ *
+ * It is recommended to use msg_puts() for arbitrary signed values or msg_putu()
+ * for arbitrary unsigned values. Those functions will produce the smallest
+ * possible encoding for any value.
+ */
+void msg_puti7(unsigned char *out, signed char val);
+
+/*
+ * Writes the signed int val in the range [-128, 127] to the buffer out.
+ *
+ * out must point to at least 2 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2}.
+ *
+ * It is recommended to use msg_puts() for arbitrary signed values. That
+ * function will produce the smallest possible encoding for any value.
+ */
+int msg_puts8(unsigned char *out, signed char val);
+
+/*
+ * Writes the unsigned int val in the range [0, 255] to the buffer out.
+ *
+ * out must point to at least 2 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2}.
+ *
+ * It is recommended to use msg_putu() for arbitrary unsigned values. That
+ * function will produce the smallest possible encoding for any value.
+ */
+int msg_putu8(unsigned char *out, unsigned char val);
+
+/*
+ * Writes the signed int val in the range [-65536, 65535] to the buffer out.
+ *
+ * out must point to at least 3 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2, 3}.
+ *
+ * It is recommended to use msg_puts() for arbitrary signed values. That
+ * function will produce the smallest possible encoding for any value.
+ */
+int msg_puts16(unsigned char *out, short val);
+
+/*
+ * Writes the unsigned int val in the range [0, 65536] to the buffer out.
+ *
+ * out must point to at least 3 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2, 3}.
+ *
+ * It is recommended to use msg_putu() for arbitrary unsigned values. That
+ * function will produce the smallest possible encoding for any value.
+ */
+int msg_putu16(unsigned char *out, unsigned short val);
+
+/*
+ * Writes the signed int val in the range [-2147483648, 2147483647] to the
+ * buffer out.
+ *
+ * out must point to at least 5 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2, 3, 5}.
+ *
+ * It is recommended to use msg_puts() for arbitrary signed values. That
+ * function will produce the smallest possible encoding for any value.
+ */
+int msg_puts32(unsigned char *out, int val);
+
+/*
+ * Writes the unsigned int val in the range [0, 4294967295] to the buffer out.
+ *
+ * out must point to at least 5 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2, 3, 5}.
+ *
+ * It is recommended to use msg_putu() for arbitrary unsigned values. That
+ * function will produce the smallest possible encoding for any value.
+ */
+int msg_putu32(unsigned char *out, unsigned int val);
+
+/*
+ * Writes the signed int val in the range [-9223372036854775808,
+ * 9223372036854775807] to the buffer out.
+ *
+ * out must point to at least 9 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2, 3, 5, 9}.
+ */
+int msg_puts(unsigned char *out, long long val);
+
+/*
+ * Writes the unsigned int val in the range [0, 18446744073709551616] to the
+ * buffer out.
+ *
+ * out must point to at least 9 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2, 3, 5, 9}.
+ */
+int msg_putu(unsigned char *out, unsigned long long val);
+
+/*
+ * Writes the IEEE 754 single-precision float val to the buffer out. Always
+ * writes 5 bytes.
+ *
+ * out must point to at least 5 bytes.
+ */
+void msg_putf(unsigned char *out, float val);
+
+/*
+ * Writes the IEEE 754 double-precision float val to the buffer out.
+ *
+ * out must point to at least 9 bytes.
+ *
+ * Returns the number of bytes written, one of {5, 9}.
+ */
+int msg_putd(unsigned char *out, double val);
+
+/*
+ * Writes the string size sz in the range [0, 15] to the buffer out. Values
+ * outside this range will produce an undefined encoding. Always writes a single
+ * byte.
+ *
+ * In a complete message stream, a size of N must be immediately followed by N
+ * bytes of the actual string, which must be valid UTF-8.
+ *
+ * out must point to at least 1 byte.
+ *
+ * It is recommended to use msg_putssz() for arbitrary string sizes. That
+ * function will produce the smallest possible encoding for any size value.
+ */
+void msg_putssz5(unsigned char *out, int sz);
+
+/*
+ * Writes the string size sz in the range [0, 255] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by N
+ * bytes of the actual string, which must be valid UTF-8.
+ *
+ * out must point to at least 2 bytes.
+ *
+ * It is recommended to use msg_putssz() for arbitrary string sizes. That
+ * function will produce the smallest possible encoding for any size value.
+ */
+int msg_putssz8(unsigned char *out, int sz);
+
+/*
+ * Writes the string size sz in the range [0, 65535] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by N
+ * bytes of the actual string, which must be valid UTF-8.
+ *
+ * out must point to at least 3 bytes.
+ *
+ * It is recommended to use msg_putssz() for arbitrary string sizes. That
+ * function will produce the smallest possible encoding for any size value.
+ */
+int msg_putssz16(unsigned char *out, int sz);
+
+/*
+ * Writes the string size sz in the range [0, 4294967295] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by N
+ * bytes of the actual string, which must be valid UTF-8.
+ *
+ * out must point to at least 5 bytes.
+ */
+int msg_putssz(unsigned char *out, unsigned int sz);
+
+/*
+ * Writes the binary blob size sz in the range [0, 255] to the buffer out.
+ * Always writes 2 bytes.
+ *
+ * In a complete message stream, a size of N must be immediately followed by
+ * N bytes of the actual data.
+ *
+ * out must point to at least 2 bytes.
+ *
+ * It is recommended to use msg_putbsz() for arbitrary binary blob sizes. That
+ * function will produce the smallest possible encoding for any size value.
+ */
+void msg_putbsz8(unsigned char *out, int sz);
+
+/*
+ * Writes the binary blob size sz in the range [0, 65535] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by
+ * N bytes of the actual data.
+ *
+ * out must point to at least 3 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2, 3}.
+ *
+ * It is recommended to use msg_putbsz() for arbitrary binary blob sizes. That
+ * function will produce the smallest possible encoding for any size value.
+ */
+int msg_putbsz16(unsigned char *out, int sz);
+
+/*
+ * Writes the binary blob size sz in the range [0, 4294967295] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by
+ * N bytes of the actual data.
+ *
+ * out must point to at least 5 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 2, 3, 5}.
+ */
+int msg_putbsz(unsigned char *out, unsigned int sz);
+
+/*
+ * Writes the array size sz in the range [0, 15] to the buffer out. Values
+ * outside this range will produce an undefined encoding. Always writes a single
+ * byte.
+ *
+ * In a complete message stream, a size of N must be immediately followed by N
+ * other messages, which form the contents of the array.
+ *
+ * out must point to at least 1 byte.
+ *
+ * It is recommended to use msg_putasz() for arbitrary array sizes. That
+ * function will produce the smallest possible encoding for any size value.
+ */
+void msg_putasz4(unsigned char *out, int sz);
+
+/*
+ * Writes the array size sz in the range [0, 65535] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by N
+ * other messages, which form the contents of the array.
+ *
+ * out must point to at least 3 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 3}.
+ *
+ * It is recommended to use msg_putasz() for arbitrary array sizes. That
+ * function will produce the smallest possible encoding for any size value.
+ */
+int msg_putasz16(unsigned char *out, int sz);
+
+/*
+ * Writes the array size sz in the range [0, 4294967295] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by N
+ * other messages, which form the contents of the array.
+ *
+ * out must point to at least 5 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 3, 5}.
+ */
+int msg_putasz(unsigned char *out, unsigned int sz);
+
+/*
+ * Writes the map size sz in the range [0, 15] to the buffer out. Values
+ * outside this range will produce an undefined encoding. Always writes a single
+ * byte.
+ *
+ * In a complete message stream, a size of N must be immediately followed by
+ * N * 2 other messages, which form the contents of the map as keys followed by
+ * values in alternation.
+ *
+ * out must point to at least 1 byte.
+ *
+ * It is recommended to use msg_putmsz() for arbitrary map sizes. That function
+ * will produce the smallest possible encoding for any size value.
+ */
+void msg_putmsz4(unsigned char *out, int sz);
+
+/*
+ * Writes the array size sz in the range [0, 65536] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by
+ * N * 2 other messages, which form the contents of the map as keys followed by
+ * values in alternation.
+ *
+ * out must point to at least 3 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 3}.
+ *
+ * It is recommended to use msg_putmsz() for arbitrary map sizes. That function
+ * will produce the smallest possible encoding for any size value.
+ */
+int msg_putmsz16(unsigned char *out, int sz);
+
+/*
+ * Writes the array size sz in the range [0, 4294967295] to the buffer out.
+ *
+ * In a complete message stream, a size of N must be immediately followed by
+ * N * 2 other messages, which form the contents of the map as keys followed by
+ * values in alternation.
+ *
+ * out must point to at least 5 bytes.
+ *
+ * Returns the number of bytes written, one of {1, 3, 5}.
+ */
+int msg_putmsz(unsigned char *out, unsigned int sz);
+
+#ifdef __cplusplus
+}
+#endif
+#undef _msg_Bool
+
+#endif
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/crypto.c b/src/crypto.c
index f7ccd78..6d0f2aa 100644
--- a/src/crypto.c
+++ b/src/crypto.c
@@ -3,4 +3,28 @@
#include "3p/monocypher/monocypher.c"
#include "3p/monocypher/monocypher-rng.c"
+// -- SST-specific extensions to 4.0.1 API below --
+void crypto_aead_lock_djb(u8 *cipher_text, u8 mac[16], const u8 key[32],
+ const u8 nonce[8], const u8 *ad, size_t ad_size,
+ const u8 *plain_text, size_t text_size)
+{
+ crypto_aead_ctx ctx;
+ crypto_aead_init_djb(&ctx, key, nonce);
+ crypto_aead_write(&ctx, cipher_text, mac, ad, ad_size,
+ plain_text, text_size);
+ crypto_wipe(&ctx, sizeof(ctx));
+}
+
+int crypto_aead_unlock_djb(u8 *plain_text, const u8 mac[16], const u8 key[32],
+ const u8 nonce[8], const u8 *ad, size_t ad_size,
+ const u8 *cipher_text, size_t text_size)
+{
+ crypto_aead_ctx ctx;
+ crypto_aead_init_djb(&ctx, key, nonce);
+ int mismatch = crypto_aead_read(&ctx, plain_text, mac, ad, ad_size,
+ cipher_text, text_size);
+ crypto_wipe(&ctx, sizeof(ctx));
+ return mismatch;
+}
+
// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/crypto.h b/src/crypto.h
index 44d4fe2..7f0d607 100644
--- a/src/crypto.h
+++ b/src/crypto.h
@@ -6,6 +6,20 @@
#include "3p/monocypher/monocypher.h"
#include "3p/monocypher/monocypher-rng.h"
+// -- SST-specific extensions to 4.0.1 API below --
+void crypto_aead_lock_djb(uint8_t *cipher_text,
+ uint8_t mac [16],
+ const uint8_t key [32],
+ const uint8_t nonce[8],
+ const uint8_t *ad, size_t ad_size,
+ const uint8_t *plain_text, size_t text_size);
+int crypto_aead_unlock_djb(uint8_t *plain_text,
+ const uint8_t mac [16],
+ const uint8_t key [32],
+ const uint8_t nonce[8],
+ const uint8_t *ad, size_t ad_size,
+ const uint8_t *cipher_text, size_t text_size);
+
#endif
// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/democustom.c b/src/democustom.c
index 0ecbaa3..5dcbe01 100644
--- a/src/democustom.c
+++ b/src/democustom.c
@@ -14,9 +14,10 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
+#include <string.h>
+
#include "bitbuf.h"
#include "con_.h"
-#include "democustom.h"
#include "demorec.h"
#include "engineapi.h"
#include "errmsg.h"
@@ -26,6 +27,8 @@
#include "mem.h"
#include "ppmagic.h"
#include "vcall.h"
+#include "x86.h"
+#include "x86util.h"
FEATURE()
REQUIRE(demorec)
@@ -34,72 +37,64 @@ REQUIRE_GAMEDATA(vtidx_RecordPacket)
static int nbits_msgtype, nbits_datalen;
-// The engine allows usermessages up to 255 bytes, we add 2 bytes of overhead,
-// and then there's the leading bits before that too (see create_message)
-static char bb_buf[DEMOCUSTOM_MSG_MAX + 4];
+// engine limit is 255, we use 2 bytes for header + round the bitstream to the
+// next whole byte, which gives 3 bytes overhead hence 252 here.
+#define CHUNKSZ 252
+
+static union {
+ char x[CHUNKSZ + /*7*/ 8]; // needs to be multiple of of 4!
+ bitbuf_cell _align; // just in case...
+} bb_buf;
static struct bitbuf bb = {
- bb_buf, sizeof(bb_buf), sizeof(bb_buf) * 8, 0, false, false, "SST"
+ {bb_buf.x}, sizeof(bb_buf), sizeof(bb_buf) * 8, 0, false, false, "SST"
};
-static void create_message(struct bitbuf *msg, const void *buf, int len) {
- // The way we pack our custom demo data is via a user message packet with
- // type "HudText" - this causes the client to do a text lookup which will
- // simply silently fail on invalid keys. By making the first byte null
- // (creating an empty string), we get the rest of the packet to stick in
- // whatever other data we want.
+static const void *createhdr(struct bitbuf *msg, int len, bool last) {
+ // We pack custom data into user message packets of type "HudText," with a
+ // leading null byte which the engine treats as an empty string. On demo
+ // playback, the client does a text lookup which fails silently on invalid
+ // keys, giving us the rest of the packet to stick in whatever data we want.
//
- // Notes from Uncrafted:
- // > But yeah the data you want to append is as follows:
- // > - 6 bits (5 bits in older versions) for the message type - should be 23
- // > for user message
- bitbuf_appendbits(msg, 23, nbits_msgtype);
- // > - 1 byte for the user message type - should be 2 for HudText
- bitbuf_appendbyte(msg, 2);
- // > - ~~an int~~ 11 or 12 bits for the length of your data in bits,
- bitbuf_appendbits(msg, len * 8, nbits_datalen); // NOTE: assuming len <= 254
- // > - your data
- // [first the aforementioned null byte, plus an arbitrary marker byte to
- // avoid confusion when parsing the demo later...
- bitbuf_appendbyte(msg, 0);
- bitbuf_appendbyte(msg, 0xAC);
- // ... and then just the data itself]
- bitbuf_appendbuf(msg, buf, len);
- // Thanks Uncrafted, very cool!
+ // Big thanks to our resident demo expert, Uncrafted, for explaining what to
+ // do here way back when this was first being figured out!
+ bitbuf_appendbits(msg, 23, nbits_msgtype); // type: 23 is user message
+ bitbuf_appendbyte(msg, 2); // user message type: 2 is HudText
+ bitbuf_appendbits(msg, len * 8, nbits_datalen); // our data length in bits
+ bitbuf_appendbyte(msg, 0); // aforementionied null byte
+ bitbuf_appendbyte(msg, 0xAC + last); // arbitrary marker byte to aid parsing
+ // store the data itself byte-aligned so there's no need to bitshift the
+ // universe (which would be both slower and more annoying to do)
+ bitbuf_roundup(msg);
+ return msg->buf + (msg->nbits >> 3);
}
typedef void (*VCALLCONV WriteMessages_func)(void *this, struct bitbuf *msg);
static WriteMessages_func WriteMessages = 0;
void democustom_write(const void *buf, int len) {
- create_message(&bb, buf, len);
+ for (; len > CHUNKSZ; len -= CHUNKSZ) {
+ createhdr(&bb, CHUNKSZ, false);
+ memcpy(bb.buf + (bb.nbits >> 3), buf, CHUNKSZ);
+ bb.nbits += CHUNKSZ << 3;
+ WriteMessages(demorecorder, &bb);
+ bitbuf_reset(&bb);
+ }
+ createhdr(&bb, len, true);
+ memcpy(bb.buf + (bb.nbits >> 3), buf, len);
+ bb.nbits += len << 3;
WriteMessages(demorecorder, &bb);
bitbuf_reset(&bb);
}
static bool find_WriteMessages(void) {
- // TODO(compat): rewrite this to just scan for a call instruction!
const uchar *insns = (*(uchar ***)demorecorder)[vtidx_RecordPacket];
- // RecordPacket calls WriteMessages pretty much right away:
- // 56 push esi
- // 57 push edi
- // 8B F1 mov esi,ecx
- // 8D BE lea edi,[esi + 0x68c]
- // 8C 06 00 00
- // 57 push edi
- // E8 call CDemoRecorder_WriteMessages
- // B0 EF FF FF
- // So we just double check the byte pattern...
- static const uchar bytes[] =
-#ifdef _WIN32
- HEXBYTES(56, 57, 8B, F1, 8D, BE, 8C, 06, 00, 00, 57, E8);
-#else
-#warning This is possibly different on Linux too, have a look!
- {-1, -1, -1, -1, -1, -1};
-#endif
- if (!memcmp(insns, bytes, sizeof(bytes))) {
- ssize off = mem_loadoffset(insns + sizeof(bytes));
- WriteMessages = (WriteMessages_func)(insns + sizeof(bytes) + 4 + off);
- return true;
+ // RecordPacket calls WriteMessages right away, so just look for a call
+ for (const uchar *p = insns; p - insns < 32;) {
+ if (*p == X86_CALL) {
+ WriteMessages = (WriteMessages_func)(p + 5 + mem_loadoffset(p + 1));
+ return true;
+ }
+ NEXT_INSN(p, "WriteMessages function");
}
return false;
}
@@ -108,23 +103,21 @@ DECL_VFUNC_DYN(int, GetEngineBuildNumber)
INIT {
// More UncraftedkNowledge:
- // > yeah okay so [the usermessage length is] 11 bits if the demo protocol
- // > is 11 or if the game is l4d2 and the network protocol is 2042.
- // > otherwise it's 12 bits
- // > there might be some other l4d2 versions where it's 11 but idk
+ // - usermessage length is:
+ // - 11 bits in protocol 11, or l4d2 protocol 2042
+ // - otherwise 12 bits
// So here we have to figure out the network protocol version!
// NOTE: assuming engclient != null as GEBN index relies on client version
int buildnum = GetEngineBuildNumber(engclient);
- // condition is redundant until other GetEngineBuildNumber offsets are added
- // if (GAMETYPE_MATCHES(L4D2)) {
+ //if (GAMETYPE_MATCHES(L4D2)) { // redundant until we add more GEBN offsets!
nbits_msgtype = 6;
// based on Some Code I Read, buildnum *should* be the protocol version,
// however L4D2 returns the actual game version instead, because sure
// why not. The only practical difference though is that the network
- // protocol froze after 2042, so we just have to do a >=. No big deal
- // really.
+ // protocol froze after 2042, so we just have to do a >=. Fair enough!
+ // TODO(compat): how does TLS affect this? no idea yet
if (buildnum >= 2042) nbits_datalen = 11; else nbits_datalen = 12;
- // }
+ //}
return find_WriteMessages();
}
diff --git a/src/democustom.h b/src/democustom.h
index a0a28b8..7f0c25e 100644
--- a/src/democustom.h
+++ b/src/democustom.h
@@ -17,12 +17,9 @@
#ifndef INC_DEMOCUSTOM_H
#define INC_DEMOCUSTOM_H
-/* maximum length of a custom demo message, in bytes */
-#define DEMOCUSTOM_MSG_MAX 253
-
/*
- * Write a block of up to DEMOWRITER_MSG_MAX bytes into the currently recording
- * demo - NOT bounds checked, caller MUST ensure length is okay!
+ * Writes a custom demo message, automatically splitting into multiple demo
+ * packets if too long. Assumes a demo is currently being recorded.
*/
void democustom_write(const void *buf, int len);
diff --git a/src/demorec.c b/src/demorec.c
index e176ab3..e728ca5 100644
--- a/src/demorec.c
+++ b/src/demorec.c
@@ -50,6 +50,10 @@ bool demorec_forceauto = false;
#define SIGNONSTATE_SPAWN 5
#define SIGNONSTATE_FULL 6
+DEF_PREDICATE(DemoControlAllowed, void)
+DEF_EVENT(DemoRecordStarting, void)
+DEF_EVENT(DemoRecordStopped, int)
+
typedef void (*VCALLCONV SetSignonState_func)(void *, int);
static SetSignonState_func orig_SetSignonState;
static void VCALLCONV hook_SetSignonState(void *this_, int state) {
@@ -83,6 +87,9 @@ static void VCALLCONV hook_StopRecording(void *this) {
*recording = true;
*demonum = lastnum;
}
+ else {
+ EMIT_DemoRecordStopped(lastnum);
+ }
}
DECL_VFUNC_DYN(void, StartRecording)
@@ -90,10 +97,8 @@ DECL_VFUNC_DYN(void, StartRecording)
static struct con_cmd *cmd_record, *cmd_stop;
static con_cmdcb orig_record_cb, orig_stop_cb;
-DEF_PREDICATE(AllowDemoControl, void)
-
static void hook_record_cb(const struct con_cmdargs *args) {
- if (!CHECK_AllowDemoControl()) return;
+ if (!CHECK_DemoControlAllowed()) return;
bool was = *recording;
if (!was && args->argc == 2 || args->argc == 3) {
// safety check: make sure a directory exists, otherwise recording
@@ -152,10 +157,11 @@ static void hook_record_cb(const struct con_cmdargs *args) {
// mike: I think this is questionably necessary but I'm outvoted :)
con_msg("Demo recording started\n");
}
+ EMIT_DemoRecordStarting();
}
static void hook_stop_cb(const struct con_cmdargs *args) {
- if (!CHECK_AllowDemoControl()) return;
+ if (!CHECK_DemoControlAllowed()) return;
wantstop = true;
orig_stop_cb(args);
wantstop = false;
@@ -229,6 +235,7 @@ bool demorec_start(const char *name) {
struct con_cmdargs args = {.argc = 2, .argv = {0, name, 0}};
orig_record_cb(&args);
if (!was && *recording) *demonum = 0; // same logic as in the hook
+ EMIT_DemoRecordStarting();
return *recording;
}
@@ -237,11 +244,12 @@ int demorec_stop(void) {
// making this correct when recording and stopping in the menu lol
int ret = *demonum;
orig_StopRecording(demorecorder);
+ EMIT_DemoRecordStopped(ret);
return ret;
}
-bool demorec_recording(void) {
- return *recording;
+int demorec_demonum(void) {
+ return *recording ? *demonum : -1;
}
INIT {
diff --git a/src/demorec.h b/src/demorec.h
index 2de4f24..b53896f 100644
--- a/src/demorec.h
+++ b/src/demorec.h
@@ -52,21 +52,38 @@ bool demorec_start(const char *name);
/*
* Stops recording the current demo and returns the number of demos recorded
* (the first will have the original basename + .dem extension; the rest will
- * have the _N.dem suffixes).
+ * have the _N.dem suffixes). Value will be zero if the recording is stopped
+ * before the game has even gotten a chance to create the first demo file.
*/
int demorec_stop(void);
/*
- * Queries whether a demo is currently being recorded.
+ * Returns the current number in the recording sequence, or -1 if not recording.
+ * Value may be 0 if recording was requested but has yet to start (say, because
+ * we have yet to join a map).
*/
-bool demorec_recording(void);
+int demorec_demonum(void);
/*
* Used to determine whether to allow usage of the normal record and stop
* commands. Code which takes over control of demo recording can use this to
* block the user from interfering.
*/
-DECL_PREDICATE(AllowDemoControl)
+DECL_PREDICATE(DemoControlAllowed, void)
+
+/*
+ * Emitted whenever a recording session is about to be started, as a result of
+ * either the record command or a call to the demorec_start() function. A demo
+ * file won't actually have been created yet; this merely indicates that a
+ * request to record has happened.
+ */
+DECL_EVENT(DemoRecordStarting, void)
+
+/*
+ * Emitted when the current demo or series of demos has finished recording.
+ * Receives the number of recorded demo files (which could be 0) as an argument.
+ */
+DECL_EVENT(DemoRecordStopped, int)
#endif
diff --git a/src/engineapi.c b/src/engineapi.c
index f4a54d6..24a2d6b 100644
--- a/src/engineapi.c
+++ b/src/engineapi.c
@@ -41,6 +41,7 @@ DECL_VFUNC(void *, GetGlobalVars, 1) // seems to be very stable, thank goodness
void *globalvars;
void *inputsystem, *vgui;
+struct CServerPlugin *pluginhandler;
DECL_VFUNC_DYN(void *, GetAllServerClasses)
@@ -48,6 +49,7 @@ DECL_VFUNC_DYN(void *, GetAllServerClasses)
bool engineapi_init(int pluginver) {
if (!con_detect(pluginver)) return false;
+ pluginhandler = factory_engine("ISERVERPLUGINHELPERS001", 0);
if (engclient = factory_engine("VEngineClient015", 0)) {
_gametype_tag |= _gametype_tag_Client015;
diff --git a/src/engineapi.h b/src/engineapi.h
index d740c8c..fbc062b 100644
--- a/src/engineapi.h
+++ b/src/engineapi.h
@@ -136,6 +136,23 @@ extern struct VEngineServer *engserver;
extern void *globalvars;
extern void *inputsystem, *vgui;
+// XXX: not exactly engine *API* but not curently clear where else to put this
+struct CPlugin {
+ char description[128];
+ bool paused;
+ void *theplugin; // our own "this" pointer (or whichever other plugin it is)
+ int ifacever;
+ // should be the plugin library, but in old Source branches it's just null,
+ // because CServerPlugin::Load() erroneously shadows this field with a local
+ void *module;
+};
+struct CServerPlugin /* : IServerPluginHelpers */ {
+ void **vtable;
+ struct CUtlVector plugins;
+ /*IPluginHelpersCheck*/ void *pluginhlpchk;
+};
+extern struct CServerPlugin *pluginhandler;
+
/*
* Called on plugin init to attempt to initialise various core interfaces.
* This includes console/cvar initialisation and populating gametype and
diff --git a/src/os-win32.h b/src/os-win32.h
index a006083..db20964 100644
--- a/src/os-win32.h
+++ b/src/os-win32.h
@@ -50,10 +50,10 @@ typedef unsigned short os_char;
#define OS_MAIN wmain
-static inline void *os_dlopen(const ushort *name) {
+static inline void *os_dlopen(const unsigned short *name) {
return LoadLibraryW(name);
}
-static inline void *os_dlhandle(const ushort *name) {
+static inline void *os_dlhandle(const unsigned short *name) {
return GetModuleHandleW(name);
}
static inline void *os_dlsym(void *m, const char *s) {
diff --git a/src/rinput.c b/src/rinput.c
index 6b6d4d7..79b6661 100644
--- a/src/rinput.c
+++ b/src/rinput.c
@@ -64,7 +64,7 @@ DEF_CVAR_UNREG(m_rawinput, "Use Raw Input for mouse input (SST reimplementation)
DEF_CVAR_MINMAX(sst_mouse_factor, "Number of hardware mouse counts per step",
1, 1, 20, /*CON_ARCHIVE |*/ CON_HIDDEN)
-static ssize __stdcall inproc(void *wnd, uint msg, ssize wp, ssize lp) {
+static ssize __stdcall inproc(void *wnd, uint msg, usize wp, ssize lp) {
switch (msg) {
case WM_INPUT:;
char buf[sizeof(RAWINPUTHEADER) + sizeof(RAWMOUSE) /* = 40 */];
diff --git a/src/sst.c b/src/sst.c
index 61a021f..9fd1108 100644
--- a/src/sst.c
+++ b/src/sst.c
@@ -30,6 +30,7 @@
#include "gametype.h"
#include "hook.h"
#include "os.h"
+#include "sst.h"
#include "vcall.h"
#include "version.h"
@@ -46,10 +47,13 @@ static int ifacever;
void *clientlib = 0;
bool sst_earlyloaded = false; // see deferinit() below
+bool sst_userunloaded = false; // see hook_plugin_unload_cb() below
+
+#define VDFBASENAME "SourceSpeedrunTools"
#ifdef _WIN32
extern long __ImageBase; // this is actually the PE header struct but don't care
-#define ownhandle() ((void *)&__ImageBase)
+static inline void *ownhandle(void) { return &__ImageBase; }
#else
// sigh, _GNU_SOURCE crap. define here instead >:(
typedef struct {
@@ -59,16 +63,16 @@ typedef struct {
void *dli_saddr;
} Dl_info;
int dladdr1(const void *addr, Dl_info *info, void **extra_info, int flags);
-static inline void *ownhandle(void) {
+static void *ownhandle(void) {
+ static void *cached = 0;
Dl_info dontcare;
- void *dl;
- dladdr1((void *)&ownhandle, &dontcare, &dl, /*RTLD_DL_LINKMAP*/ 2);
- return dl;
+ if (!cached) {
+ dladdr1((void *)&ownhandle, &dontcare, &cached, /*RTLD_DL_LINKMAP*/ 2);
+ }
+ return cached;
}
#endif
-#define VDFBASENAME "SourceSpeedrunTools"
-
#ifdef _WIN32
// not a proper check, just a short-circuit check to avoid doing more work.
static inline bool checksamedrive(const ushort *restrict path1,
@@ -305,6 +309,45 @@ e: con_warn("!!! SOME FEATURES MAY BE BROKEN !!!\n");
return false;
}
+DEF_PREDICATE(AllowPluginLoading, bool)
+DEF_EVENT(PluginLoaded, void)
+DEF_EVENT(PluginUnloaded, void)
+
+static struct con_cmd *cmd_plugin_load, *cmd_plugin_unload;
+static con_cmdcb orig_plugin_load_cb, orig_plugin_unload_cb;
+
+static int ownidx; // XXX: super hacky way of getting this to do_unload()
+
+static void hook_plugin_load_cb(const struct con_cmdargs *args) {
+ if (args->argc == 1) return;
+ if (!CHECK_AllowPluginLoading(true)) return;
+ orig_plugin_load_cb(args);
+ EMIT_PluginLoaded();
+}
+static void hook_plugin_unload_cb(const struct con_cmdargs *args) {
+ if (args->argc == 1) return;
+ if (!CHECK_AllowPluginLoading(false)) return;
+ int idx = atoi(args->argv[1]);
+ struct CPlugin **plugins = pluginhandler->plugins.m.mem;
+ if (idx >= 0 && idx < pluginhandler->plugins.sz &&
+ plugins[idx]->theplugin == &plugin_obj) {
+ sst_userunloaded = true;
+ ownidx = idx;
+#ifdef __clang__
+ // thanks clang for forcing use of return here and THEN warning about it
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpedantic"
+ __attribute__((musttail)) return orig_plugin_unload_cb(args);
+#pragma clang diagnostic pop
+#else
+#error We are tied to clang without an assembly solution for this!
+#endif
+ }
+ // if it's some other plugin being unloaded, we can keep doing stuff after
+ orig_plugin_unload_cb(args);
+ EMIT_PluginUnloaded();
+}
+
static bool do_load(ifacefactory enginef, ifacefactory serverf) {
if (!hook_init()) {
errmsg_warnsys("couldn't set up memory for function hooking");
@@ -326,46 +369,34 @@ static bool do_load(ifacefactory enginef, ifacefactory serverf) {
*p++ = (void *)&nop_p_v; // OnEdictAllocated
*p = (void *)&nop_p_v; // OnEdictFreed
if (!deferinit()) { do_featureinit(); fixes_apply(); }
+ if (pluginhandler) {
+ cmd_plugin_load = con_findcmd("plugin_load");
+ orig_plugin_load_cb = cmd_plugin_load->cb;
+ cmd_plugin_load->cb = &hook_plugin_load_cb;
+ cmd_plugin_unload = con_findcmd("plugin_unload");
+ orig_plugin_unload_cb = cmd_plugin_unload->cb;
+ cmd_plugin_unload->cb = &hook_plugin_unload_cb;
+ }
return true;
}
-struct CServerPlugin /* : IServerPluginHelpers */ {
- void **vtable;
- struct CUtlVector plugins;
- /*IPluginHelpersCheck*/ void *pluginhlpchk;
-};
-struct CPlugin {
- char description[128];
- bool paused;
- void *theplugin; // our own "this" pointer (or whichever other plugin it is)
- int ifacever;
- // should be the plugin library, but in old Source branches it's just null,
- // because CServerPlugin::Load() erroneously shadows this field with a local
- void *module;
-};
-
static void do_unload(void) {
#ifdef _WIN32 // this is only relevant in builds that predate linux support
- struct CServerPlugin *pluginhandler =
- factory_engine("ISERVERPLUGINHELPERS001", 0);
- if (pluginhandler) { // if not, oh well too bad we tried :^)
- struct CPlugin **plugins = pluginhandler->plugins.m.mem;
- int n = pluginhandler->plugins.sz;
- for (struct CPlugin **pp = plugins; pp - plugins < n; ++pp) {
- if ((*pp)->theplugin == (void *)&plugin_obj) {
- // see comment in CPlugin above. setting this to the real handle
- // right before the engine tries to unload us allows it to
- // actually do so. in newer branches this is redundant but
- // doesn't do any harm so it's just unconditional.
- // NOTE: old engines ALSO just leak the handle and never call
- // Unload() if Load() fails; can't really do anything about that
- (*pp)->module = ownhandle();
- break;
- }
+ if (pluginhandler) { // if not, oh well too bad :^)
+ cmd_plugin_load->cb = orig_plugin_load_cb;
+ cmd_plugin_unload->cb = orig_plugin_unload_cb;
+ if (sst_userunloaded) {
+ struct CPlugin **plugins = pluginhandler->plugins.m.mem;
+ // see comment in CPlugin above. setting this to the real handle
+ // right before the engine tries to unload us allows it to actually
+ // do so. in newer branches this is redundant but doesn't do any
+ // harm so it's just unconditional. NOTE: old engines ALSO just leak
+ // the handle and never call Unload() if Load() fails; can't really
+ // do anything about that.
+ plugins[ownidx]->module = ownhandle();
}
}
#endif
-
endfeatures();
#ifdef __linux__
if (clientlib) dlclose(clientlib);
diff --git a/src/sst.h b/src/sst.h
index 8c10798..f7e4bb4 100644
--- a/src/sst.h
+++ b/src/sst.h
@@ -24,10 +24,17 @@
DECL_EVENT(ClientActive, struct edict */*player*/)
DECL_EVENT(Tick, bool /*simulating*/)
+DECL_PREDICATE(AllowPluginLoading, void)
+DECL_EVENT(PluginLoaded, void)
+DECL_EVENT(PluginUnloaded, void)
+
extern void *clientlib;
/* occasionally useful: quick query to determine how sst was loaded */
extern bool sst_earlyloaded;
+/* similar query for how we are being unloaded - ONLY valid during unload */
+// TODO(opt): we can skip a whole bunch of cleanup when exiting the game!
+extern bool sst_userunloaded;
#endif
diff --git a/test/bitbuf.test.c b/test/bitbuf.test.c
index 324a7f6..554d3a2 100644
--- a/test/bitbuf.test.c
+++ b/test/bitbuf.test.c
@@ -15,15 +15,11 @@ static union {
static struct bitbuf bb = {bb_buf.buf, 512, 512 * 8, 0, false, false, "test"};
TEST("The possible UB in bitbuf_appendbuf shouldn't trigger horrible bugs") {
- char unalign[3] = {'X', 'X', 'X'};
- char _buf[32 + sizeof(bitbuf_cell)];
- char *buf = _buf;
- if (bitbuf_align <= 1) {
- // *shouldn't* happen
+ if (bitbuf_align <= 1) { // *shouldn't* happen
fputs("what's going on with the alignment???\n", stderr);
return false;
}
- // make sure the pointer is definitely misaligned
+ char _buf[32 + _Alignof(bitbuf_cell)], *buf = _buf;
while (!((usize)buf % bitbuf_align)) ++buf;
memcpy(buf, "Misaligned test buffer contents!", 32);
@@ -31,4 +27,18 @@ TEST("The possible UB in bitbuf_appendbuf shouldn't trigger horrible bugs") {
return !memcmp(bb.buf, buf, 32);
}
+TEST("Aligning to the next byte should work as intended") {
+ for (int i = 0; i < 65535; i += 8) {
+ bb.curbit = i;
+ bitbuf_roundup(&bb);
+ if (bb.curbit != i) return false; // don't round if already rounded
+ for (int j = i + 1; j < i + 8; ++j) {
+ bb.curbit = j;
+ bitbuf_roundup(&bb);
+ if (bb.curbit != i + 8) return false;
+ }
+ }
+ return true;
+}
+
// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/tools/genkeypair.c b/tools/genkeypair.c
new file mode 100644
index 0000000..7db856b
--- /dev/null
+++ b/tools/genkeypair.c
@@ -0,0 +1,42 @@
+// *SUPER* low effort x25519 keygen tool, for lack of better tool on hand.
+// To compile:
+// Unix: $CC -O2 -Dbool=_Bool -o.build/genkeypair tools/genkeypair.c
+// Windows: clang-cl -fuse-ld=lld -O2 -Dbool=_Bool -Fe.build/genkeypair.exe tools/genkeypair.c /link advapi32.lib
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../src/3p/monocypher/monocypher.c" // yeah, we're doing this.
+#include "../src/os.h"
+
+int main(void) {
+ unsigned char prv[32], pub[32];
+ os_randombytes(prv, sizeof(prv));
+ crypto_x25519_public_key(pub, prv);
+ // and now the worst string formatting code you've ever seen!!!
+ fputs("Private key: ", stdout);
+ for (int i = 0; i < sizeof(prv); i += 4) {
+ fprintf(stdout, "%.2X%.2X%.2X%.2X",
+ prv[i], prv[i + 1], prv[i + 2], prv[i + 3]);
+ }
+ fputs("\n{", stdout);
+ for (int i = 0; i < sizeof(prv) - 4; i += 4) {
+ fprintf(stdout, "0x%.2X, 0x%.2X, 0x%.2X, 0x%.2X, ",
+ prv[i], prv[i + 1], prv[i + 2], prv[i + 3]);
+ }
+ fprintf(stdout, "0x%.2X, 0x%.2X, 0x%.2X, 0x%.2X}\n\nPublic key: ",
+ prv[sizeof(prv) - 4], prv[sizeof(prv) - 3],
+ prv[sizeof(prv) - 2], prv[sizeof(prv) - 1]);
+ for (int i = 0; i < sizeof(pub); i += 4) {
+ fprintf(stdout, "%.2X%.2X%.2X%.2X",
+ pub[i], pub[i + 1], pub[i + 2], pub[i + 3]);
+ }
+ fputs("\n{", stdout);
+ for (int i = 0; i < sizeof(pub) - 4; i += 4) {
+ fprintf(stdout, "0x%.2X, 0x%.2X, 0x%.2X, 0x%.2X, ",
+ pub[i], pub[i + 1], pub[i + 2], pub[i + 3]);
+ }
+ fprintf(stdout, "0x%.2X, 0x%.2X, 0x%.2X, 0x%.2X}\n",
+ pub[sizeof(pub) - 4], pub[sizeof(pub) - 3],
+ pub[sizeof(pub) - 2], pub[sizeof(pub) - 1]);
+}