summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMichael Smith <mikesmiffy128@gmail.com>2024-02-26 18:54:16 +0000
committerMichael Smith <mikesmiffy128@gmail.com>2024-02-26 18:54:16 +0000
commitefd2491a3d19160d9cae2a094e7baa9f2d958196 (patch)
tree1cbcae136679bc1411c3758c1252ffdc67d7d12b
Implement initial versionv1.0
-rw-r--r--.gitignore5
-rw-r--r--LICENCE18
-rw-r--r--README48
-rw-r--r--TODO.txt3
-rw-r--r--compile.bat13
-rw-r--r--dist/LICENCE-threadfix19
-rw-r--r--src/exe.rc33
-rw-r--r--src/fakeiat.h45
-rw-r--r--src/injected.c77
-rw-r--r--src/intdefs.h25
-rw-r--r--src/version.h3
-rw-r--r--src/wincrt.c37
-rw-r--r--src/wrap.c145
-rw-r--r--src/x86.c96
-rw-r--r--src/x86.h565
-rw-r--r--tools/mkbindist.bat28
16 files changed, 1160 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..62b9d63
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+/*.wrap.exe
+/junk/
+/compile_commands.json
+/compile_flags.txt
+/release/
diff --git a/LICENCE b/LICENCE
new file mode 100644
index 0000000..8fe70da
--- /dev/null
+++ b/LICENCE
@@ -0,0 +1,18 @@
+Except where otherwise noted, the following terms apply:
+════════════════════════════════════════════════════════════════════════════════
+Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com>
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+════════════════════════════════════════════════════════════════════════════════
+Parts of this software distribution are released into the public domain. Check
+the copyright notices in individual files for full details.
diff --git a/README b/README
new file mode 100644
index 0000000..d01020e
--- /dev/null
+++ b/README
@@ -0,0 +1,48 @@
+══════ Source Thread Fix ══════
+
+This builds a wrapper executable to fix a bug present in old versions of various
+Source games wherein having too many CPU threads available will cause a crash.
+
+It supports Windows only; I don’t know whether this is also an issue on any
+Linux game build but I’ll cross that bridge if anyone ever reports a problem.
+This tool was originally created because some Left 4 Dead 2 runners have
+high-end Ryzens and such with very high core counts, and were essentially unable
+to play without a fix.
+
+NOTE: Please read and understand LICENCE before redistributing this software!
+
+════ Compiling ════
+
+• Install the Windows 10 SDK and MSVC toolset via the Visual Studio Installer
+ (if you don’t care what else gets installed, installing the C++ Desktop
+ workload should be fine).
+• Install native Clang from https://clang.llvm.org (NOT MinGW/MSYS2 Clang!).
+• Run compile.bat.
+
+You might be able to use MSVC instead of Clang for this but I didn’t bother
+trying to find out.
+
+════ Usage ════
+
+The default build output is hl2.wrap.exe, which will wrap a neighbouring
+hl2.exe. In the likely event that your game binary has a different name, just
+match that; for example call it left4dead2.wrap.exe to wrap Left 4 Dead 2.
+
+Then, in whatever script or shortcut you use to invoke your game binary, simply
+invoke the wrapper instead. It will automatically find the original binary
+beside itself and start it up with the necessary hook in place to fix the bug.
+
+IMPORTANT: The wrapper forcibly passes -insecure to every game it wraps, to
+reduce the risk of a VAC ban. Even then, do NOT join any servers that were not
+also started in insecure mode, or (particularly with older game versions) you
+might still be at risk of getting banned even with -insecure set on your end.
+YOU HAVE BEEN WARNED!
+
+Lastly, note that if you don’t have more than 24 threads, this thing won’t
+exactly do anything useful, so if you’re bundling this into an old version of
+a game for speedrunning or other purposes, consider having the launch script
+first check the core count and bypass this if it’s unnecessary. It doesn’t
+really matter a huge deal, but you might as well.
+
+Thanks, and have fun!
+- Michael Smith <mikesmiffy128@gmail.com>
diff --git a/TODO.txt b/TODO.txt
new file mode 100644
index 0000000..62e0471
--- /dev/null
+++ b/TODO.txt
@@ -0,0 +1,3 @@
+* -DWIN32_LEAN_AND_MEAN -DNOMINMAX -DWINCRT_NO_FLTUSED, remove #defines
+* move into sst, reuse intdefs and x86, add #ifndef WINCRT_NO_FLTUSED to wincrt.c
+* figure out distribution???
diff --git a/compile.bat b/compile.bat
new file mode 100644
index 0000000..3194e14
--- /dev/null
+++ b/compile.bat
@@ -0,0 +1,13 @@
+:: This file is dedicated to the public domain.
+@echo off
+
+llvm-rc /nologo /r /fo tmp.res src/exe.rc
+clang-cl -m32 -mno-sse -fuse-ld=lld -flto -O1 -GR- -GS- -Gs9999999 -EHa- -Oi ^
+-Fehl2.wrap.exe -W3 -Wpedantic -DWIN32_LEAN_AND_MEAN -DNOMINMAX ^
+src/injected.c src/wincrt.c src/wrap.c src/x86.c ^
+-link -nodefaultlib -subsystem:windows,6.0 -stack:0x10000,0x10000 -fixed:no ^
+-dynamicbase -Brepro ^
+kernel32.lib user32.lib tmp.res
+del tmp.res & exit /b %errorlevel%
+
+:: vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/dist/LICENCE-threadfix b/dist/LICENCE-threadfix
new file mode 100644
index 0000000..0de6541
--- /dev/null
+++ b/dist/LICENCE-threadfix
@@ -0,0 +1,19 @@
+Source Thread Fix is released under the following copyright licence:
+════════════════════════════════════════════════════════════════════════════════
+Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com>
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+════════════════════════════════════════════════════════════════════════════════
+Please respect these terms when distributing copies of the game wrapper
+executable — doing so is as simple as keeping this LICENCE-threadfix file in
+place. Thanks, and have fun! :^)
diff --git a/src/exe.rc b/src/exe.rc
new file mode 100644
index 0000000..e826184
--- /dev/null
+++ b/src/exe.rc
@@ -0,0 +1,33 @@
+/* This file is dedicated to the public domain. */
+
+#include "version.h"
+
+#define EN_GB 0x809
+
+1 VERSIONINFO
+FILEVERSION VERSION_MAJOR,VERSION_MINOR,0,0
+PRODUCTVERSION VERSION_MAJOR,VERSION_MINOR,0,0
+FILEFLAGSMASK 0x17L
+FILEFLAGS 0
+FILEOS 4
+
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "080904b0"
+ BEGIN
+ VALUE "FileDescription", "High thread count fix wrapper for Source"
+ VALUE "FileVersion", VERSION
+ VALUE "InternalName", "Source Thread Fix"
+ VALUE "LegalCopyright", "Michael Smith 2024; ISC licence"
+ VALUE "ProductName", "Source Thread Fix"
+ VALUE "ProductVersion", VERSION
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", EN_GB, 1200
+ END
+END
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/fakeiat.h b/src/fakeiat.h
new file mode 100644
index 0000000..3e51381
--- /dev/null
+++ b/src/fakeiat.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef INC_FAKEIAT_H
+#define INC_FAKEIAT_H
+
+struct _SYSTEM_INFO;
+
+typedef int (*__stdcall _iat_FlushInstructionCache_func)(void *, const void *,
+ unsigned long);
+typedef void (*__stdcall _iat_GetSystemInfo_func)(struct _SYSTEM_INFO *);
+typedef int (*__stdcall _iat_VirtualProtect_func)(void *, unsigned long,
+ unsigned long, unsigned long *);
+
+// Because this is one EXE (not a DLL), injecting it with LoadLibrary doesn't
+// fill out the IAT properly, causing crashes when API functions are used. We
+// _could_ just manually populate/fix up the IAT, but that's kind of a pain in
+// the arse. Instead, we use this poor-man's IAT to pass down literally three
+// functions that are used inside of the child process' address space.
+extern struct _fakeiat {
+ _iat_FlushInstructionCache_func FlushInstructionCache;
+ _iat_GetSystemInfo_func GetSystemInfo;
+ _iat_VirtualProtect_func VirtualProtect;
+} IAT;
+
+#ifdef FAKEIAT_DEFINES
+#define FlushInstructionCache (IAT.FlushInstructionCache)
+#define GetSystemInfo (IAT.GetSystemInfo)
+#define VirtualProtect (IAT.VirtualProtect)
+#endif
+
+#endif
diff --git a/src/injected.c b/src/injected.c
new file mode 100644
index 0000000..bd69474
--- /dev/null
+++ b/src/injected.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <Windows.h>
+
+#define FAKEIAT_DEFINES
+#include "fakeiat.h"
+#include "intdefs.h"
+#include "x86.h"
+
+// The stuff in this file gets called in the address space of the child process!
+// Because we're an EXE, our IAT doesn't get filled properly, so in order to
+// call kernel32 functions without crashing, make sure they're in the fake IAT.
+// See fakeiat.h and wrap.c.
+
+__declspec(align(4096))
+static uchar trampoline[4096]; // has to be a whole page, obviously
+
+// simplified version of the usual hook.c code since there's only a single
+// function to hook and no need to unhook
+static bool hook(void *func_, void *target) {
+ ulong old;
+ if (!VirtualProtect(trampoline, sizeof(trampoline),
+ PAGE_EXECUTE_READWRITE, &old)) {
+ return false;
+ }
+ uchar *func = func_;
+ while (*func == X86_JMPIW) func += *(int *)(func + 1) + 5;
+ if (!VirtualProtect(func, 5, PAGE_EXECUTE_READWRITE, &old)) return false;
+ int len = 0;
+ for (;;) {
+ if (func[len] == X86_CALL) return false;
+ int ilen = x86_len(func + len);
+ if (ilen == -1) return false;
+ len += ilen;
+ if (len >= 5) break;
+ if (func[len] == X86_JMPIW) return false;
+ }
+ memcpy(trampoline, func, len);
+ trampoline[len] = X86_JMPIW;
+ uint diff = func - (trampoline + 5); // goto the continuation
+ memcpy(trampoline + len + 1, &diff, 4);
+ diff = (uchar *)target - (func + 5); // goto the hook target
+ func[0] = X86_JMPIW;
+ memcpy(func + 1, &diff, 4);
+ // -1 is the current process, and it's a constant in the WDK, so it's
+ // assumed we can safely avoid the useless GetCurrentProcess call
+ FlushInstructionCache((void *)-1, func, 5);
+ return true;
+}
+
+typedef void (*__stdcall GetSystemInfo_func)(SYSTEM_INFO *info);
+#define orig_GetSystemInfo ((GetSystemInfo_func)trampoline)
+static void __stdcall hook_GetSystemInfo(SYSTEM_INFO *info) {
+ orig_GetSystemInfo(info);
+ // Here's where the magic happens! NOTE: the actual limit is a bit higher
+ // than this, but there's probably not much reason to go over this either.
+ if (info->dwNumberOfProcessors > 24) info->dwNumberOfProcessors = 24;
+}
+
+__declspec(noinline) int __stdcall injectedentry(int unused) {
+ return hook((void *)GetSystemInfo, (void *)&hook_GetSystemInfo);
+}
diff --git a/src/intdefs.h b/src/intdefs.h
new file mode 100644
index 0000000..97c6f82
--- /dev/null
+++ b/src/intdefs.h
@@ -0,0 +1,25 @@
+/* This file is dedicated to the public domain. */
+
+#ifndef INC_INTDEFS_H
+#define INC_INTDEFS_H
+
+typedef signed char schar;
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+typedef long long vlong;
+typedef unsigned long long uvlong;
+
+typedef schar s8;
+typedef uchar u8;
+typedef short s16;
+typedef ushort u16;
+typedef int s32;
+typedef uint u32;
+typedef vlong s64;
+typedef uvlong u64;
+
+#endif
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/version.h b/src/version.h
new file mode 100644
index 0000000..89d1553
--- /dev/null
+++ b/src/version.h
@@ -0,0 +1,3 @@
+#define VERSION "1.0"
+#define VERSION_MAJOR 1
+#define VERSION_MINOR 0
diff --git a/src/wincrt.c b/src/wincrt.c
new file mode 100644
index 0000000..ae24045
--- /dev/null
+++ b/src/wincrt.c
@@ -0,0 +1,37 @@
+/* This file is dedicated to the public domain. */
+
+// TODO(opt): this feels like a sad implementation, can we do marginally better?
+int memcmp(const void *x_, const void *y_, unsigned int sz) {
+ const char *x = x_, *y = y_;
+ for (unsigned int i = 0; i < sz; ++i) {
+ if (x[i] > y[i]) return 1;
+ if (x[i] < y[i]) return -1;
+ }
+ return 0;
+}
+
+void *memcpy(void *restrict x, const void *restrict y, unsigned int sz) {
+#ifdef __clang__
+ __asm__ volatile (
+ "rep movsb\n" :
+ "=D" (x), "=S" (y), "=c" (sz) :
+ "0" (x), "1" (y), "2" (sz) :
+ "memory"
+ );
+#else // terrible fallback just in case someone wants to use this with MSVC
+ char *restrict xb = x; const char *restrict yb = y;
+ for (unsigned int i = 0; i < sz; ++i) xb[i] = yb[i];
+#endif
+ return x;
+}
+
+// this was briefly needed at some point in debugging but seems to be gone again
+// (hence crappy impl). if compiler starts calling memset with opts on, we
+// should use a proper rep stosb impl as well
+//void *memset(void *x, int c, unsigned int n) {
+// char *xb = x;
+// for (; n; ++xb, --n) *xb = c;
+// return x;
+//}
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/wrap.c b/src/wrap.c
new file mode 100644
index 0000000..f856cb2
--- /dev/null
+++ b/src/wrap.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <Windows.h>
+
+#include "fakeiat.h"
+#include "intdefs.h"
+
+extern struct HINSTANCE__ __ImageBase;
+
+struct _fakeiat IAT;
+
+static int len(const ushort *s) {
+ int i = 0;
+ for (; *s; ++s) ++i;
+ return i;
+}
+
+static _Noreturn void die(int status, const ushort *message) {
+ MessageBoxW(0, message, L"Thread fix wrapper error", 0);
+ ExitProcess(status);
+}
+
+__declspec(noinline) int __stdcall injectedentry(int unused); // injected.c
+
+static void *rpc(void *proc, void *rfunc, void *rparam, const ushort *errstr) {
+ void *rthread = CreateRemoteThread(proc, 0, 32768,
+ (LPTHREAD_START_ROUTINE)rfunc, rparam, 0, 0);
+ if (!rthread) {
+ TerminateProcess(proc, -1);
+ die(100, errstr);
+ }
+ WaitForSingleObject(rthread, INFINITE);
+ void *ret;
+ GetExitCodeThread(rthread, (ulong *)&ret);
+ return ret;
+}
+
+// main EXE entry point. this seems not to get called when we're LoadLibrary'd!
+_Noreturn void __stdcall WinMainCRTStartup(void) {
+ ushort name[MAX_PATH], origname[MAX_PATH];
+ ushort cmdline[32678];
+ ushort *myargs = GetCommandLineW();
+ bool quote = false, oddslash = false;
+ for (;; ++myargs) {
+ if (*myargs == '\0') die(1, L"Unexpected end of command line");
+ if (*myargs == '\\') {
+ oddslash = !oddslash;
+ }
+ else {
+ if (*myargs == '"') { if (!oddslash) quote = !quote; }
+ else if ((*myargs == ' ' || *myargs == '\t') && !quote) break;
+ oddslash = false;
+ }
+ }
+ while (*++myargs == ' ' || *myargs == '\t');
+ if (len(myargs) > 32767 - MAX_PATH - sizeof("\"\"-steam -insecure ") - 1) {
+ die(1, L"Command line is too long");
+ }
+ int namelen = GetModuleFileNameW(0, name, MAX_PATH);
+ if (namelen < sizeof("x.wrap.exe") - 1 ||
+ memcmp(name + namelen - 9, L".wrap.exe", 9)) {
+ die(2, L"Wrapper name must end in .wrap.exe");
+ }
+ cmdline[0] = L'"';
+ int i = 0;
+ for (; i < namelen - 9; ++i) {
+ origname[i] = name[i];
+ cmdline[i + 1] = name[i]; // XXX: assuming no quotes etc. prolly fine?
+ }
+ memcpy(origname + i, L".exe", 4 * sizeof(*origname));
+ memcpy(cmdline + i + 1, L".exe\" -steam -insecure ", 23 * sizeof(*cmdline));
+ const ushort *p = myargs; ushort *q = cmdline + i + 24;
+ do *q++ = *p; while (*p++);
+ PROCESS_INFORMATION info;
+ STARTUPINFOW startinfo = {.cb = sizeof(startinfo)};
+ if (!CreateProcessW(origname, cmdline, 0, 0, 0, CREATE_SUSPENDED, 0, 0,
+ &startinfo, &info)) {
+ die(100, L"Couldn't start subprocess");
+ }
+ // avoid any possible thunky weirdness using GPA rather than &LoadLibraryW
+ void *k32 = GetModuleHandleW(L"kernel32.dll");
+ if (!k32) die(100, L"Couldn't get kernel32 module; everything is on fire!");
+ void *lladdr = (void *)GetProcAddress(k32, "LoadLibraryW");
+ int namebytes = (namelen + 1) * sizeof(*name);
+ int rsize = sizeof("ThreadFixEntryPoint");
+ if (namebytes > rsize) rsize = namebytes;
+ void *rmem = VirtualAllocEx(info.hProcess, 0, rsize,
+ MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+ if (!rmem) {
+ TerminateProcess(info.hProcess, -1);
+ die(100, L"Couldn't allocate memory in subprocess");
+ }
+ WriteProcessMemory(info.hProcess, rmem, name, namebytes, 0);
+ void *rdll = rpc(info.hProcess, lladdr, rmem,
+ L"Couldn't call LoadLibrary in subprocess");
+ if (!rdll) {
+ TerminateProcess(info.hProcess, -1);
+ die(100, L"LoadLibrary call in subprocess returned an error");
+ }
+ // injectentry will be at the same offset, just a different base
+ void *rfunc = (char *)rdll + ((char *)&injectedentry - (char *)&__ImageBase);
+ VirtualFreeEx(info.hProcess, rmem, rsize, MEM_RELEASE);
+ // Fill out the "fake IAT" table and use WPM to copy it to the injected side
+ // of things. See fakeiat.h for more exposition.
+#define PUTIAT(f) IAT.f = (_iat_##f##_func)GetProcAddress(k32, #f)
+ PUTIAT(GetSystemInfo);
+ PUTIAT(FlushInstructionCache);
+ PUTIAT(VirtualProtect);
+#undef PUTIAT
+ IAT.GetSystemInfo = (_iat_GetSystemInfo_func)GetProcAddress(
+ k32, "GetSystemInfo");
+ IAT.FlushInstructionCache = (_iat_FlushInstructionCache_func)GetProcAddress(
+ k32, "FlushInstructionCache");
+ IAT.VirtualProtect = (_iat_VirtualProtect_func)GetProcAddress(
+ k32, "VirtualProtect");
+ IAT.FlushInstructionCache = &FlushInstructionCache;
+ IAT.VirtualProtect = &VirtualProtect;
+ void *riat = (char *)rdll + ((char *)&IAT - (char *)&__ImageBase);
+ WriteProcessMemory(info.hProcess, riat, &IAT, sizeof(IAT), 0);
+ if (!rpc(info.hProcess, rfunc, 0,
+ L"Couldn't call injected entry point in subprocess")) {
+ die(100, L"Injected code failed to hook GetSystemInfo");
+ }
+ ResumeThread(info.hThread);
+ CloseHandle(info.hThread);
+ WaitForSingleObject(info.hProcess, INFINITE);
+ ulong status;
+ GetExitCodeProcess(info.hProcess, &status);
+ ExitProcess(status);
+}
diff --git a/src/x86.c b/src/x86.c
new file mode 100644
index 0000000..7a5d00e
--- /dev/null
+++ b/src/x86.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "intdefs.h"
+#include "x86.h"
+
+static int mrmsib(const uchar *p, int addrlen) {
+ // I won't lie: I thought I almost understood this, but after Bill walked me
+ // through correcting a bunch of wrong cases I now realise that I don't
+ // really understand it at all. If it helps, I used this as a reference:
+ // https://github.com/Nomade040/length-disassembler/blob/e8b34546/ldisasm.cpp#L14
+ // But it's confusingly-written enough that the code I wrote before didn't
+ // work, so with any luck nobody will need to refer to it again and this is
+ // actually correct now. Fingers crossed.
+ if (addrlen == 4 || *p & 0xC0) {
+ int sib = addrlen == 4 && *p < 0xC0 && (*p & 7) == 4;
+ switch (*p & 0xC0) {
+ // disp8
+ case 0x40: return 2 + sib;
+ // disp16/32
+ case 0:
+ if ((*p & 7) != 5) {
+ // disp8/32 via SIB
+ if (sib && (p[1] & 7) == 5) return *p & 0x40 ? 3 : 6;
+ return 1 + sib;
+ }
+ case 0x80: return 1 + addrlen + sib;
+ }
+ }
+ if (addrlen == 2 && *p == 0x26) return 3;
+ return 1; // note: include the mrm itself in the byte count
+}
+
+int x86_len(const void *insn_) {
+#define CASES(name, _) case name:
+ const uchar *insn = insn_;
+ int pfxlen = 0, addrlen = 4, operandlen = 4;
+
+p: switch (*insn) {
+ case X86_PFX_ADSZ: addrlen = 2; goto P; // bit dumb sorry
+ case X86_PFX_OPSZ: operandlen = 2;
+P: X86_SEG_PREFIXES(CASES)
+ case X86_PFX_LOCK: case X86_PFX_REPN: case X86_PFX_REP:
+ // instruction can only be 15 bytes. this could go over, oh well,
+ // just don't want to loop for 8 million years
+ if (++pfxlen == 14) return -1;
+ ++insn;
+ goto p;
+ }
+
+ switch (*insn) {
+ X86_OPS_1BYTE_NO(CASES) return pfxlen + 1;
+ X86_OPS_1BYTE_I8(CASES) operandlen = 1;
+ X86_OPS_1BYTE_IW(CASES) return pfxlen + 1 + operandlen;
+ X86_OPS_1BYTE_I16(CASES) return pfxlen + 3;
+ X86_OPS_1BYTE_MRM(CASES) return pfxlen + 1 + mrmsib(insn + 1, addrlen);
+ X86_OPS_1BYTE_MRM_I8(CASES) operandlen = 1;
+ X86_OPS_1BYTE_MRM_IW(CASES)
+ return pfxlen + 1 + operandlen + mrmsib(insn + 1, addrlen);
+ case X86_ENTER: return pfxlen + 4;
+ case X86_CRAZY8: operandlen = 1;
+ case X86_CRAZYW:
+ if ((insn[1] & 0x38) >= 0x10) operandlen = 0;
+ return pfxlen + 1 + operandlen + mrmsib(insn + 1, addrlen);
+ case X86_2BYTE: ++insn; goto b2;
+ }
+ return -1;
+
+b2: switch (*insn) {
+ // we don't support any 3 byte ops for now, implement if ever needed...
+ case X86_3BYTE1: case X86_3BYTE2: case X86_3DNOW: return -1;
+ X86_OPS_2BYTE_NO(CASES) return pfxlen + 2;
+ X86_OPS_2BYTE_IW(CASES) return pfxlen + 2 + operandlen;
+ X86_OPS_2BYTE_MRM(CASES) return pfxlen + 2 + mrmsib(insn + 1, addrlen);
+ X86_OPS_2BYTE_MRM_I8(CASES) operandlen = 1;
+ return pfxlen + 2 + operandlen + mrmsib(insn + 1, addrlen);
+ }
+
+ return -1;
+#undef CASES
+}
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/x86.h b/src/x86.h
new file mode 100644
index 0000000..197d4c3
--- /dev/null
+++ b/src/x86.h
@@ -0,0 +1,565 @@
+/*
+ * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef INC_X86_H
+#define INC_X86_H
+
+/*
+ * Opcode-based X86 instruction analysis. In other words, *NOT* a disassembler.
+ * Only cares about the instructions we expect to see in basic 32-bit userspace
+ * functions; there's no kernel-mode instructions, no SSE 3+, no AVX, no REX,
+ * EVEX, yadda yadda.
+ */
+
+// XXX: no BOUND (0x62): ambiguous with EVEX prefix - can't be arsed!
+
+/* Instruction prefixes: segments */
+#define X86_SEG_PREFIXES(X) \
+ X(X86_PFX_ES, 0x26) \
+ X(X86_PFX_CS, 0x2E) \
+ X(X86_PFX_SS, 0x36) \
+ X(X86_PFX_DS, 0x3E) \
+ X(X86_PFX_FS, 0x64) \
+ X(X86_PFX_GS, 0x65)
+
+/* Instruction prefixes: operations */
+#define X86_OP_PREFIXES(X) \
+ X(X86_PFX_OPSZ, 0x66) \
+ X(X86_PFX_ADSZ, 0x67) \
+ X(X86_PFX_LOCK, 0xF0) \
+ X(X86_PFX_REPN, 0xF2) \
+ X(X86_PFX_REP, 0xF3)
+
+/* All instruction prefixes */
+#define X86_PREFIXES(X) X86_SEG_PREFIXES(X) X86_OP_PREFIXES(X)
+
+/* Single-byte opcodes with no operands */
+#define X86_OPS_1BYTE_NO(X) \
+ X(X86_PUSHES, 0x06) \
+ X(X86_POPES, 0x07) \
+ X(X86_PUSHCS, 0x0E) \
+ X(X86_PUSHSS, 0x16) \
+ X(X86_POPSS, 0x17) \
+ X(X86_PUSHDS, 0x1E) \
+ X(X86_POPDS, 0x1F) \
+ X(X86_DAA, 0x27) \
+ X(X86_DAS, 0x2F) \
+ X(X86_AAA, 0x37) \
+ X(X86_AAS, 0x3F) \
+ X(X86_INCEAX, 0x40) \
+ X(X86_INCECX, 0x41) \
+ X(X86_INCEDX, 0x42) \
+ X(X86_INCEBX, 0x43) \
+ X(X86_INCESP, 0x44) \
+ X(X86_INCEBP, 0x45) \
+ X(X86_INCESI, 0x46) \
+ X(X86_INCEDI, 0x47) \
+ X(X86_DECEAX, 0x48) \
+ X(X86_DECECX, 0x49) \
+ X(X86_DECEDX, 0x4A) \
+ X(X86_DECEBX, 0x4B) \
+ X(X86_DECESP, 0x4C) \
+ X(X86_DECEBP, 0x4D) \
+ X(X86_DECESI, 0x4E) \
+ X(X86_DECEDI, 0x4F) \
+ X(X86_PUSHEAX, 0x50) \
+ X(X86_PUSHECX, 0x51) \
+ X(X86_PUSHEDX, 0x52) \
+ X(X86_PUSHEBX, 0x53) \
+ X(X86_PUSHESP, 0x54) \
+ X(X86_PUSHEBP, 0x55) \
+ X(X86_PUSHESI, 0x56) \
+ X(X86_PUSHEDI, 0x57) \
+ X(X86_POPEAX, 0x58) \
+ X(X86_POPECX, 0x59) \
+ X(X86_POPEDX, 0x5A) \
+ X(X86_POPEBX, 0x5B) \
+ X(X86_POPESP, 0x5C) \
+ X(X86_POPEBP, 0x5D) \
+ X(X86_POPESI, 0x5E) \
+ X(X86_POPEDI, 0x5F) \
+ X(X86_PUSHA, 0x60) \
+ X(X86_POPA, 0x61) \
+ X(X86_NOP, 0x90) \
+ X(X86_XCHGECXEAX, 0x91) \
+ X(X86_XCHGEDXEAX, 0x92) \
+ X(X86_XCHGEBXEAX, 0x93) \
+ X(X86_XCHGESPEAX, 0x94) \
+ X(X86_XCHGEBPEAX, 0x95) \
+ X(X86_XCHGESIEAX, 0x96) \
+ X(X86_XCHGEDIEAX, 0x97) \
+ X(X86_CWDE, 0x98) \
+ X(X86_CDQ, 0x99) \
+ X(X86_WAIT, 0x9B) \
+ X(X86_PUSHF, 0x9C) \
+ X(X86_POPF, 0x9D) \
+ X(X86_SAHF, 0x9E) \
+ X(X86_LAHF, 0x9F) \
+ X(X86_MOVS8, 0xA4) \
+ X(X86_MOVSW, 0xA5) \
+ X(X86_CMPS8, 0xA6) \
+ X(X86_CMPSW, 0xA7) \
+ X(X86_STOS8, 0xAA) \
+ X(X86_STOSD, 0xAB) \
+ X(X86_LODS8, 0xAC) \
+ X(X86_LODSD, 0xAD) \
+ X(X86_SCAS8, 0xAE) \
+ X(X86_SCASD, 0xAF) \
+ X(X86_RET, 0xC3) \
+ X(X86_LEAVE, 0xC9) \
+ X(X86_RETF, 0xCB) \
+ X(X86_INT3, 0xCC) \
+ X(X86_INTO, 0xCE) \
+ X(X86_XLAT, 0xD7) \
+ X(X86_JMPI8, 0xEB) \
+ X(X86_CMC, 0xF5) \
+ X(X86_CLC, 0xF8) \
+ X(X86_STC, 0xF9) \
+ X(X86_CLI, 0xFA) \
+ X(X86_STI, 0xFB) \
+ X(X86_CLD, 0xFC) \
+ X(X86_STD, 0xFD)
+
+/* Single-byte opcodes with a 1-byte immediate operand */
+#define X86_OPS_1BYTE_I8(X) \
+ X(X86_ADDALI, 0x04) \
+ X(X86_ORALI, 0x0C) \
+ X(X86_ADCALI, 0x14) \
+ X(X86_SBBALI, 0x1C) \
+ X(X86_ANDALI, 0x24) \
+ X(X86_SUBALI, 0x2C) \
+ X(X86_XORALI, 0x34) \
+ X(X86_CMPALI, 0x3C) \
+ X(X86_PUSHI8, 0x6A) \
+ X(X86_MOVALII, 0xA0) /* From offset (indirect) */ \
+ X(X86_MOVIIAL, 0xA2) /* To offset (indirect) */ \
+ X(X86_TESTALI, 0xA8) \
+ X(X86_JO, 0x70) \
+ X(X86_JNO, 0x71) \
+ X(X86_JB, 0x72) /* AKA JC */ \
+ X(X86_JNB, 0x73) /* AKA JNC */ \
+ X(X86_JZ, 0x74) /* AKA JE */ \
+ X(X86_JNZ, 0x75) /* AKA JNZ */ \
+ X(X86_JNA, 0x76) /* AKA JBE */ \
+ X(X86_JA, 0x77) /* AKA JNBE */ \
+ X(X86_JS, 0x78) \
+ X(X86_JNS, 0x79) \
+ X(X86_JP, 0x7A) \
+ X(X86_JNP, 0x7B) \
+ X(X86_JL, 0x7C) /* AKA JNGE */ \
+ X(X86_JNL, 0x7D) /* AKA JGE */ \
+ X(X86_JNG, 0x7E) /* AKA JLE */ \
+ X(X86_JG, 0x7F) /* AKA JNLE */ \
+ X(X86_MOVALI, 0xB0) \
+ X(X86_MOVCLI, 0xB1) \
+ X(X86_MOVDLI, 0xB2) \
+ X(X86_MOVBLI, 0xB3) \
+ X(X86_MOVAHI, 0xB4) \
+ X(X86_MOVCHI, 0xB5) \
+ X(X86_MOVDHI, 0xB6) \
+ X(X86_MOVBHI, 0xB7) \
+ X(X86_INT, 0xCD) \
+ X(X86_AMX, 0xD4) /* Note: D4 0A is referred to as AAM */ \
+ X(X86_ADX, 0xD5) /* Note: D4 0A is referred to as AAD */ \
+ X(X86_LOOPNZ, 0xE0) /* AKA LOOPNE */ \
+ X(X86_LOOPZ, 0xE1) /* AKA LOOPE */ \
+ X(X86_LOOP, 0xE2) \
+ X(X86_JCXZ, 0xE3)
+
+/* Single-byte opcodes with a word-sized immediate operand */
+#define X86_OPS_1BYTE_IW(X) \
+ X(X86_ADDEAXI, 0x05) \
+ X(X86_OREAXI, 0x0D) \
+ X(X86_ADCEAXI, 0x15) \
+ X(X86_SBBEAXI, 0x1D) \
+ X(X86_ANDEAXI, 0x25) \
+ X(X86_SUBEAXI, 0x2D) \
+ X(X86_XOREAXI, 0x35) \
+ X(X86_CMPEAXI, 0x3D) \
+ X(X86_PUSHIW, 0x68) \
+ X(X86_MOVEAXII, 0xA1) /* From offset (indirect) */ \
+ X(X86_MOVIIEAX, 0xA3) /* To offset (indirect) */ \
+ X(X86_TESTEAXI, 0xA9) \
+ X(X86_MOVEAXI, 0xB8) \
+ X(X86_MOVECXI, 0xB9) \
+ X(X86_MOVEDXI, 0xBA) \
+ X(X86_MOVEBXI, 0xBB) \
+ X(X86_MOVESPI, 0xBC) \
+ X(X86_MOVEBPI, 0xBD) \
+ X(X86_MOVESII, 0xBE) \
+ X(X86_MOVEDII, 0xBF) \
+ X(X86_CALL, 0xE8) \
+ X(X86_JMPIW, 0xE9)
+
+/* Single-byte opcodes with 16-bit immediate operands, regardless of prefixes */
+#define X86_OPS_1BYTE_I16(X) \
+ X(X86_RETI16, 0xC2) \
+ X(X86_RETFI16, 0xCA)
+
+/*
+ * Single-byte opcodes with a ModRM. `MR` in a name means the ModRM is the
+ * destination, `RM` means it's the source.
+ */
+#define X86_OPS_1BYTE_MRM(X) \
+ X(X86_ADDMR8, 0x00) \
+ X(X86_ADDMRW, 0x01) \
+ X(X86_ADDRM8, 0x02) \
+ X(X86_ADDRMW, 0x03) \
+ X(X86_ORMR8, 0x08) \
+ X(X86_ORMRW, 0x09) \
+ X(X86_ORRM8, 0x0A) \
+ X(X86_ORRMW, 0x0B) \
+ X(X86_ADCMR8, 0x10) \
+ X(X86_ADCMRW, 0x11) \
+ X(X86_ADCRM8, 0x12) \
+ X(X86_ADCRMW, 0x13) \
+ X(X86_SBBMR8, 0x18) \
+ X(X86_SBBMRW, 0x19) \
+ X(X86_SBBRM8, 0x1A) \
+ X(X86_SBBRMW, 0x1B) \
+ X(X86_ANDMR8, 0x20) \
+ X(X86_ANDMRW, 0x21) \
+ X(X86_ANDRM8, 0x22) \
+ X(X86_ANDRMW, 0x23) \
+ X(X86_SUBMR8, 0x28) \
+ X(X86_SUBMRW, 0x29) \
+ X(X86_SUBRM8, 0x2A) \
+ X(X86_SUBRMW, 0x2B) \
+ X(X86_XORMR8, 0x30) \
+ X(X86_XORMRW, 0x31) \
+ X(X86_XORRM8, 0x32) \
+ X(X86_XORRMW, 0x33) \
+ X(X86_CMPMR8, 0x38) \
+ X(X86_CMPMRW, 0x39) \
+ X(X86_CMPRM8, 0x3A) \
+ X(X86_CMPRMW, 0x3B) \
+ X(X86_ARPL, 0x63) \
+ X(X86_TESTMR8, 0x84) \
+ X(X86_TESTMRW, 0x85) \
+ X(X86_XCHGMR8, 0x86) \
+ X(X86_XCHGMRW, 0x87) \
+ X(X86_MOVMR8, 0x88) \
+ X(X86_MOVMRW, 0x89) \
+ X(X86_MOVRM8, 0x8A) \
+ X(X86_MOVRMW, 0x8B) \
+ X(X86_MOVMS, 0x8C) /* Load 4 bytes from segment register */ \
+ X(X86_LEA, 0x8D) \
+ X(X86_MOVSM, 0x8E) /* Store 4 bytes to segment register */ \
+ X(X86_POPM, 0x8F) \
+ X(X86_LES, 0xC4) \
+ X(X86_LDS, 0xC5) \
+ X(X86_SHIFTM18, 0xD0) /* Shift/roll by 1 place */ \
+ X(X86_SHIFTM1W, 0xD1) /* Shift/roll by 1 place */ \
+ X(X86_SHIFTMCL8, 0xD2) /* Shift/roll by CL places */ \
+ X(X86_SHIFTMCLW, 0xD3) /* Shift/roll by CL places */ \
+ X(X86_FLTBLK1, 0xD8) /* Various float ops (1/8) */ \
+ X(X86_FLTBLK2, 0xD9) /* Various float ops (2/8) */ \
+ X(X86_FLTBLK3, 0xDA) /* Various float ops (3/8) */ \
+ X(X86_FLTBLK4, 0xDB) /* Various float ops (4/8) */ \
+ X(X86_FLTBLK5, 0xDC) /* Various float ops (5/8) */ \
+ X(X86_FLTBLK6, 0xDD) /* Various float ops (6/8) */ \
+ X(X86_FLTBLK7, 0xDE) /* Various float ops (7/8) */ \
+ X(X86_FLTBLK8, 0xDF) /* Various float ops (8/8) */ \
+ X(X86_MISCM8, 0xFE) /* Only documented for MRM.reg in {0, 1} */ \
+ X(X86_MISCMW, 0xFF)
+
+/* Single-byte opcodes with a ModRM and a 1-byte immediate operand */
+#define X86_OPS_1BYTE_MRM_I8(X) \
+ X(X86_IMULMI8, 0x6B) /* 3-operand multiply */ \
+ X(X86_ALUMI8, 0x80) /* ALU op in MRM.reg, from immediate */ \
+ X(X86_ALUMI8X, 0x82) /* ALU op in MRM.reg, from immediate, redundant?? */ \
+ X(X86_ALUMI8S, 0x83) /* ALU op in MRM.reg, from immediate, sign-extend */ \
+ X(X86_SHIFTMI8, 0xC0) /* Shift/roll by imm8 places */ \
+ X(X86_SHIFTMIW, 0xC1) /* Shift/roll by imm8 places */ \
+ X(X86_MOVMI8, 0xC6) /* Note: RM.reg must be 0 */
+
+/* Single-byte opcodes with a ModRM and a word-sized immediate operand */
+#define X86_OPS_1BYTE_MRM_IW(X) \
+ X(X86_IMULMIW, 0x69) /* 3-operand multiply */ \
+ X(X86_ALUMIW, 0x81) /* ALU op in MRM.reg, from immediate */ \
+ X(X86_MOVMIW, 0xC7) /* Note: MRM.reg must be 0 */
+
+/* All single-byte x86 instructions */
+#define X86_OPS_1BYTE(X) \
+ X86_OPS_1BYTE_NO(X) \
+ X86_OPS_1BYTE_I8(X) \
+ X86_OPS_1BYTE_IW(X) \
+ X86_OPS_1BYTE_I16(X) \
+ X86_OPS_1BYTE_MRM(X) \
+ X86_OPS_1BYTE_MRM_I8(X) \
+ X86_OPS_1BYTE_MRM_IW(X) \
+ X(X86_ENTER, 0xC8) /* Dumb special case insn: imm16 followed by imm8 */ \
+ X(X86_CRAZY8, 0xF6) /* CRAZY reg-encoded block, has imm8 IFF reg < 2 */ \
+ X(X86_CRAZYW, 0xF7) /* CRAZY reg-encoded block, has imm32/16 IFF reg < 2 */
+
+/* Second bytes of opcodes with no operands */
+#define X86_OPS_2BYTE_NO(X) \
+ X(X86_2B_RDTSC, 0x31) \
+ X(X86_2B_RDPMD, 0x33) \
+ X(X86_2B_SYSENTER, 0x34) \
+ X(X86_2B_PUSHFS, 0xA0) \
+ X(X86_2B_POPFS, 0xA1) \
+ X(X86_2B_CPUID, 0xA2) \
+ X(X86_2B_PUSHGS, 0xA8) \
+ X(X86_2B_POPGS, 0xA9) \
+ X(X86_2B_RSM, 0xAA) \
+ X(X86_2B_BSWAPEAX, 0xC8) \
+ X(X86_2B_BSWAPECX, 0xC9) \
+ X(X86_2B_BSWAPEDX, 0xCA) \
+ X(X86_2B_BSWAPEBX, 0xCB) \
+ X(X86_2B_BSWAPESP, 0xCC) \
+ X(X86_2B_BSWAPEBP, 0xCD) \
+ X(X86_2B_BSWAPESI, 0xCE) \
+ X(X86_2B_BSWAPEDI, 0xCF) \
+ /* MMX instruction */ \
+ X(X86_2B_EMMS, 0x77)
+
+/* Second bytes of opcodes with a word-sized immediate operand */
+#define X86_OPS_2BYTE_IW(X) \
+ X(X86_2B_JOII, 0x80) /* From offset (indirect) */ \
+ X(X86_2B_JNOII, 0x81) /* From offset (indirect) */ \
+ X(X86_2B_JBII, 0x82) /* AKA JC; from offset (indirect) */ \
+ X(X86_2B_JNBII, 0x83) /* AKA JNC; from offset (indirect) */ \
+ X(X86_2B_JZII, 0x84) /* AKA JE; from offset (indirect) */ \
+ X(X86_2B_JNZII, 0x85) /* AKA JNZ; from offset (indirect) */ \
+ X(X86_2B_JNAII, 0x86) /* AKA JBE; from offset (indirect) */ \
+ X(X86_2B_JAII, 0x87) /* AKA JNBE; from offset (indirect) */ \
+ X(X86_2B_JSII, 0x88) /* From offset (indirect) */ \
+ X(X86_2B_JNSII, 0x89) /* From offset (indirect) */ \
+ X(X86_2B_JPII, 0x8A) /* From offset (indirect) */ \
+ X(X86_2B_JNPII, 0x8B) /* From offset (indirect) */ \
+ X(X86_2B_JLII, 0x8C) /* AKA JNGE; from offset (indirect) */ \
+ X(X86_2B_JNLII, 0x8D) /* AKA JGE; from offset (indirect) */ \
+ X(X86_2B_JNGII, 0x8E) /* AKA JLE; from offset (indirect) */ \
+ X(X86_2B_JGII, 0x8F) /* AKA JNLE; from offset (indirect) */
+
+/* Second bytes of opcodes with a ModRM */
+#define X86_OPS_2BYTE_MRM(X) \
+ X(X86_2B_NOP, 0x0D) /* Variable length NOP (3-9 with prefix) */ \
+ X(X86_2B_HINTS1, 0x18) /* Prefetch and hint-nop block 1/8 */ \
+ X(X86_2B_HINTS2, 0x19) /* Prefetch and hint-nop block 2/8 */ \
+ X(X86_2B_HINTS3, 0x1A) /* Prefetch and hint-nop block 3/8 */ \
+ X(X86_2B_HINTS4, 0x1B) /* Prefetch and hint-nop block 4/8 */ \
+ X(X86_2B_HINTS5, 0x1C) /* Prefetch and hint-nop block 5/8 */ \
+ X(X86_2B_HINTS6, 0x1D) /* Prefetch and hint-nop block 6/8 */ \
+ X(X86_2B_HINTS7, 0x1E) /* Prefetch and hint-nop block 7/8 */ \
+ X(X86_2B_HINTS8, 0x1F) /* Prefetch and hint-nop block 8/8 */ \
+ X(X86_2B_CMOVO, 0x40) \
+ X(X86_2B_CMOVNO, 0x41) \
+ X(X86_2B_CMOVB, 0x42) /* AKA CMOVC */ \
+ X(X86_2B_CMOVNB, 0x43) /* AKA CMOVNC */ \
+ X(X86_2B_CMOVZ, 0x44) /* AKA CMOVE */ \
+ X(X86_2B_CMOVNZ, 0x45) /* AKA CMOVNE */ \
+ X(X86_2B_CMOVNA, 0x46) /* AKA CMOVBE */ \
+ X(X86_2B_CMOVA, 0x47) /* AKA CMOVNBE */ \
+ X(X86_2B_CMOVS, 0x48) \
+ X(X86_2B_CMOVNS, 0x49) \
+ X(X86_2B_CMOVP, 0x4A) \
+ X(X86_2B_CMOVNP, 0x4B) \
+ X(X86_2B_CMOVL, 0x4C) /* AKA CMOVNGE */ \
+ X(X86_2B_CMOVNL, 0x4D) /* AKA CMOVGE */ \
+ X(X86_2B_CMOVNG, 0x4E) /* AKA CMOVLE */ \
+ X(X86_2B_CMOVG, 0x4F) /* AKA CMOVNLE */ \
+ X(X86_2B_SETO, 0x90) \
+ X(X86_2B_SETNO, 0x91) \
+ X(X86_2B_SETB, 0x92) /* AKA SETC */ \
+ X(X86_2B_SETNB, 0x93) /* AKA SETNC */ \
+ X(X86_2B_SETZ, 0x94) /* AKA SETE */ \
+ X(X86_2B_SETNZ, 0x95) /* AKA SETNZ */ \
+ X(X86_2B_SETNA, 0x96) /* AKA SETBE */ \
+ X(X86_2B_SETA, 0x97) /* AKA SETNBE */ \
+ X(X86_2B_SETS, 0x98) \
+ X(X86_2B_SETNS, 0x99) \
+ X(X86_2B_SETP, 0x9A) \
+ X(X86_2B_SETNP, 0x9B) \
+ X(X86_2B_SETL, 0x9C) /* AKA SETNGE */ \
+ X(X86_2B_SETNL, 0x9D) /* AKA SETGE */ \
+ X(X86_2B_SETNG, 0x9E) /* AKA SETLE */ \
+ X(X86_2B_SETG, 0x9F) /* AKA SETNLE */ \
+ X(X86_2B_BTMR, 0xA3) \
+ X(X86_2B_SHLDMRCL, 0xA5) \
+ X(X86_2B_BTS, 0xAB) \
+ X(X86_2B_SHRDMRCL, 0xAD) \
+ X(X86_2B_MISC, 0xAE) /* Float env stuff, memory fences */ \
+ X(X86_2B_IMUL, 0xAF) \
+ X(X86_2B_CMPXCHG8, 0xB0) \
+ X(X86_2B_CMPXCHGW, 0xB1) \
+ X(X86_2B_MOVZX8, 0xB6) \
+ X(X86_2B_MOVZXW, 0xB7) \
+ X(X86_2B_POPCNT, 0xB8) \
+ X(X86_2B_BTCRM, 0xBB) \
+ X(X86_2B_BSF, 0xBC) \
+ X(X86_2B_BSR, 0xBD) \
+ X(X86_2B_MOVSX8, 0xBE) \
+ X(X86_2B_MOVSXW, 0xBF) \
+ X(X86_2B_XADDRM8, 0xC0) \
+ X(X86_2B_XADDRMW, 0xC1) \
+ /* NOTE: this one is actually a block with some VMX stuff too; it's only
+ CMPXCHG64 (CMPXCHG8B if you prefer) if MRM.reg = 1, but naming it this
+ way seemed more useful since it's what you'll see in normal userspace
+ programs, which is what we're interested in. */ \
+ X(X86_2B_CMPXCHG64, 0xC7) \
+ /* -- MMX/SSE1/SSE2 instructions -- */ \
+ /* XXX: some of the naming here isn't totally perfect */ \
+ X(X86_2B_MOVRM128, 0x10) /* MOVUPS/MOVSS/MOVUPD/MOVD via prefix */ \
+ X(X86_2B_MOVMR128, 0x11) /* MOVUPS/MOVSS/MOVUPD/MOVD via prefix */ \
+ X(X86_2B_MOVLRM, 0x12) /* MOV(H)LPS/MOVLPD/MOVDDUP/MOVSLDUP */ \
+ X(X86_2B_MOVLMR, 0x13) /* MOVLP{S,D} */ \
+ X(X86_2B_UNPCKL, 0x14) /* UNPCKLP{S,D} */ \
+ X(X86_2B_UNPCKH, 0x15) /* UNPCKHPS/UNPCKHPD */ \
+ X(X86_2B_MOVHRM, 0x16) /* MOV(L)HPS/MOVHPD/MOVSHDUP */ \
+ X(X86_2B_MOVHMR, 0x17) /* MOVHPS/MOVHPD */ \
+ X(X86_2B_MOVARM, 0x28) /* MOVAP{S,D} via prefix */ \
+ X(X86_2B_MOVAMR, 0x29) /* MOVAP{S,D} */ \
+ X(X86_2B_CVTIF64, 0x2A) /* CVTxI2x{S,D} */ \
+ X(X86_2B_MOVNT, 0x2B) /* MOVNTP{S,D} */ \
+ X(X86_2B_CVTFT64, 0x2C) /* CVTTx{S,D}2xI */ \
+ X(X86_2B_CVTFI64, 0x2D) /* CVTx{S,D}2xI */ \
+ X(X86_2B_UCOMI, 0x2E) /* UCOMIS{S,D} */ \
+ X(X86_2B_COMI, 0x2F) /* COMIS{S,D} */ \
+ X(X86_2B_MOVMSK, 0x50) /* MOVMSDKP{S,D} */ \
+ X(X86_2B_SQRT, 0x51) /* SQRT{P,S}{S,D} */ \
+ X(X86_2B_RSQRT, 0x52) /* RSQRT{P,S}{S,D} */ \
+ X(X86_2B_RCP, 0x53) /* RCP{P,S}S */ \
+ X(X86_2B_AND, 0x54) /* ANDP{S,D} */ \
+ X(X86_2B_ANDN, 0x55) /* ANDNP{S,D} */ \
+ X(X86_2B_OR, 0x56) /* ORP{S,D} */ \
+ X(X86_2B_XOR, 0x57) /* XORP{S,D} */ \
+ X(X86_2B_ADD, 0x58) /* ADD{P,S}{S,D} */ \
+ X(X86_2B_MUL, 0x59) /* MUL{P,S}{S,D} */ \
+ X(X86_2B_CVTFF128, 0x5A) /* CVTxS2xD/CVTxS2xS */ \
+ X(X86_2B_CVTFI128, 0x5B) /* CVTDQ2PS/CVTPS2DQ/CVTTPS2DQ */ \
+ X(X86_2B_SUB, 0x5C) /* SUB{P,S}{S,D} */ \
+ X(X86_2B_DIV, 0x5D) /* DIV{P,S}{S,D} */ \
+ X(X86_2B_MIN, 0x5E) /* MIN{P,S}{S,D} */ \
+ X(X86_2B_MAX, 0x5F) /* MAX{P,S}{S,D} */ \
+ X(X86_2B_PUNPCKLBW, 0x60) \
+ X(X86_2B_PUNPCKLBD, 0x61) \
+ X(X86_2B_PUNPCKLDQ, 0x62) \
+ X(X86_2B_PACKSSWB, 0x63) \
+ X(X86_2B_PCMPGTB, 0x64) \
+ X(X86_2B_PCMPGTW, 0x65) \
+ X(X86_2B_PCMPGTD, 0x66) \
+ X(X86_2B_PACKUSWB, 0x67) \
+ X(X86_2B_PUNPCKHBW, 0x68) \
+ X(X86_2B_PUNPCKHWD, 0x69) \
+ X(X86_2B_PUNPCKHDQ, 0x6A) \
+ X(X86_2B_PACKSSDW, 0x6B) \
+ X(X86_2B_PUNPCKLQDQ, 0x6C) \
+ X(X86_2B_PUNPCKHQDQ, 0x6D) \
+ X(X86_2B_MOVDRM, 0x6E) \
+ X(X86_2B_MOVQRM, 0x6F) /* MOVQ/MOVDQA/MOVDQU */ \
+ X(X86_2B_PCMPEQB, 0x74) \
+ X(X86_2B_PCMPEQW, 0x75) \
+ X(X86_2B_PCMPEQD, 0x76) \
+ X(X86_2B_MOVDMR, 0x7E) \
+ X(X86_2B_MOVQMR, 0x7F) \
+ X(X86_2B_MOVNTI, 0xC3) \
+ X(X86_2B_ADDSUB, 0xD0) /* ADDSUBP{S,D} */ \
+ X(X86_2B_PSRLW, 0xD1) \
+ X(X86_2B_PSRLD, 0xD2) \
+ X(X86_2B_PSRLQ, 0xD3) \
+ X(X86_2B_PADDQ, 0xD4) \
+ X(X86_2B_PMULLW, 0xD5) \
+ X(X86_2B_MOVQRR, 0xD6) /* MOVQ(m,r)/MOVQ2DQ/MOVQ2DQ based on prefix */ \
+ X(X86_2B_PMOVMSKB, 0xD7) /* MOVQ2DQ/MOVDQ2Q */ \
+ X(X86_2B_PSUBUSB, 0xD8) \
+ X(X86_2B_PSUBUSW, 0xD9) \
+ X(X86_2B_PMINUB, 0xDA) \
+ X(X86_2B_PAND, 0xDB) \
+ X(X86_2B_PADDUSB, 0xDC) \
+ X(X86_2B_PADDUSW, 0xDD) \
+ X(X86_2B_PMAXUB, 0xDE) \
+ X(X86_2B_PANDN, 0xDF) \
+ X(X86_2B_PAVGB, 0xE0) \
+ X(X86_2B_PSRAW, 0xE1) \
+ X(X86_2B_PSRAD, 0xE2) \
+ X(X86_2B_PAVGW, 0xE3) \
+ X(X86_2B_PMULHUW, 0xE4) \
+ X(X86_2B_PMULHW, 0xE5) \
+ X(X86_2B_CVTQ, 0xE6) /* CVTPD2DQ/CVTTPD2DQ/CVTDQ2PD */ \
+ X(X86_2B_MOVNTQ, 0xE7) \
+ X(X86_2B_PSUBSB, 0xE8) \
+ X(X86_2B_PSUBSW, 0xE9) \
+ X(X86_2B_PMINSB, 0xEA) \
+ X(X86_2B_PMINSW, 0xEB) \
+ X(X86_2B_PADDSB, 0xEC) \
+ X(X86_2B_PADDSW, 0xED) \
+ X(X86_2B_PMAXSW, 0xEE) \
+ X(X86_2B_PXOR, 0xEF) \
+ X(X86_2B_LDDQU, 0xF0) \
+ X(X86_2B_PSLLW, 0xF1) \
+ X(X86_2B_PSLLD, 0xF2) \
+ X(X86_2B_PSLLQ, 0xF3) \
+ X(X86_2B_PMULUDQ, 0xF4) \
+ X(X86_2B_PMADDWD, 0xF5) \
+ X(X86_2B_PSABDW, 0xF6) \
+ X(X86_2B_MASKMOVQ, 0xF7) \
+ X(X86_2B_PSUBB, 0xF8) \
+ X(X86_2B_PSUBW, 0xF9) \
+ X(X86_2B_PSUBD, 0xFA) \
+ X(X86_2B_PSUBQ, 0xFB) \
+ X(X86_2B_PADDB, 0xFC) \
+ X(X86_2B_PADDW, 0xFD) \
+ X(X86_2B_PADDD, 0xFE)
+
+/* Second bytes of opcodes with a ModRM and a 1-byte immediate operand */
+#define X86_OPS_2BYTE_MRM_I8(X) \
+ X(X86_2B_SHLDMRI, 0xA4) \
+ X(X86_2B_SHRDMRI, 0xAC) \
+ X(X86_2B_BTXMI, 0xBA) /* BT/BTS/BTR/BTC depending on MRM.reg (4-7) */ \
+ /* -- MMX/SSE1/SSE2 instructions -- */ \
+ X(X86_2B_PSHUF, 0x70) /* PSHUFW/PSHUFLW/PSHUFHW/PSHUFD via MRM.reg */ \
+ X(X86_2B_PSWI, 0x71) /* PSRLW/PSRAW/PSLLW via MRM.reg */ \
+ X(X86_2B_PSDI, 0x72) /* PSRLD/PSRAD/PSLLD via MRM.reg */ \
+ X(X86_2B_PSQI, 0x73) /* PSRLQ/PSRAQ/PSLLQ via MRM.reg */ \
+ X(X86_2B_CMPSI, 0xC2) /* CMP{P,S}{S,D} via prefix */ \
+ X(X86_2B_PINSRW, 0xC4) \
+ X(X86_2B_PEXTRW, 0xC5) \
+ X(X86_2B_SHUF, 0xC6) /* SHUFP{S,D} */ \
+
+#define X86_OPS_2BYTE(X) \
+ X86_OPS_2BYTE_NO(X) \
+ X86_OPS_2BYTE_IW(X) \
+ X86_OPS_2BYTE_MRM(X) \
+ X86_OPS_2BYTE_MRM_I8(X)
+
+#define _X86_ENUM(name, value) name = value,
+enum {
+ X86_PREFIXES(_X86_ENUM)
+ X86_OPS_1BYTE(_X86_ENUM)
+ X86_2BYTE = 0x0F, /* First byte of a 2- or 3-byte opcode */
+ X86_OPS_2BYTE(_X86_ENUM)
+ X86_3BYTE1 = 0x38, /* One of the two second bytes of a three-byte opcode */
+ X86_3BYTE2 = 0x3A, /* The other second byte of a three-byte opcode */
+ X86_3DNOW = 0x0F /* The second byte of a three-byte 3DNow! opcode */
+};
+#undef _X86_ENUM
+
+/*
+ * Returns the length of an instruction, or -1 if it's a "known unknown" or
+ * invalid instruction. Doesn't handle unknown unknowns: may explode or hang on
+ * arbitrary untrusted data. Also doesn't handle, among other things, 3DNow!,
+ * SSE, MMX, AVX, and such. Aims to be small and fast rather than comprehensive.
+ */
+int x86_len(const void *insn);
+
+/* Constructs a ModRM byte, assuming the parameters are all in range. */
+#define X86_MODRM(mod, reg, rm) (unsigned char)((mod) << 6 | (reg) << 3 | rm)
+
+#endif
+
+// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/tools/mkbindist.bat b/tools/mkbindist.bat
new file mode 100644
index 0000000..49f5bc8
--- /dev/null
+++ b/tools/mkbindist.bat
@@ -0,0 +1,28 @@
+:: This file is dedicated to the public domain.
+@echo off
+
+:: NOTE: requires 7-zip, either in the default installation dir or %SEVENZIP%
+
+call compile.bat || exit /B
+if not exist release\ md release
+if "%SEVENZIP%"=="" set SEVENZIP=C:\Program Files\7-Zip\7z.exe
+setlocal EnableDelayedExpansion
+for /F "tokens=* usebackq" %%x IN (`^(echo VERSION_MAJOR ^& echo VERSION_MINOR^) ^| ^
+ clang -x c -E -include src\version.h - ^| findstr /v #`) do (
+ :: dumb but works:
+ if "!major!"=="" set major=%%x
+ set minor=%%x
+)
+setlocal DisableDelayedExpansion
+set name=threadfix-v%major%.%minor%-win32
+md TEMP-%name% || exit /B
+copy hl2.wrap.exe TEMP-%name%\hl2.wrap.exe || exit /B
+copy dist\LICENCE-threadfix TEMP-%name%\LICENCE-threadfix || exit /B
+:: using midnight on release day to make zip deterministic! change on next release!
+powershell (Get-Item TEMP-%name%\hl2.wrap.exe).LastWriteTime = new-object DateTime 2024, 2, 26, 0, 0, 0
+powershell (Get-Item TEMP-%name%\LICENCE-threadfix).LastWriteTime = new-object DateTime 2024, 2, 26, 0, 0, 0
+pushd TEMP-%name%
+"%SEVENZIP%" a -mtc=off %name%.zip hl2.wrap.exe LICENCE-threadfix || exit /B
+move %name%.zip ..\release\%name%.zip
+popd
+rd /s /q TEMP-%name%\ || exit /B