diff options
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | LICENCE | 18 | ||||
-rw-r--r-- | README | 48 | ||||
-rw-r--r-- | TODO.txt | 3 | ||||
-rw-r--r-- | compile.bat | 13 | ||||
-rw-r--r-- | dist/LICENCE-threadfix | 19 | ||||
-rw-r--r-- | src/exe.rc | 33 | ||||
-rw-r--r-- | src/fakeiat.h | 45 | ||||
-rw-r--r-- | src/injected.c | 77 | ||||
-rw-r--r-- | src/intdefs.h | 25 | ||||
-rw-r--r-- | src/version.h | 3 | ||||
-rw-r--r-- | src/wincrt.c | 37 | ||||
-rw-r--r-- | src/wrap.c | 145 | ||||
-rw-r--r-- | src/x86.c | 96 | ||||
-rw-r--r-- | src/x86.h | 565 | ||||
-rw-r--r-- | tools/mkbindist.bat | 28 |
16 files changed, 1160 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..62b9d63 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/*.wrap.exe +/junk/ +/compile_commands.json +/compile_flags.txt +/release/ @@ -0,0 +1,18 @@ +Except where otherwise noted, the following terms apply: +════════════════════════════════════════════════════════════════════════════════ +Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com> + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. +════════════════════════════════════════════════════════════════════════════════ +Parts of this software distribution are released into the public domain. Check +the copyright notices in individual files for full details. @@ -0,0 +1,48 @@ +══════ Source Thread Fix ══════ + +This builds a wrapper executable to fix a bug present in old versions of various +Source games wherein having too many CPU threads available will cause a crash. + +It supports Windows only; I don’t know whether this is also an issue on any +Linux game build but I’ll cross that bridge if anyone ever reports a problem. +This tool was originally created because some Left 4 Dead 2 runners have +high-end Ryzens and such with very high core counts, and were essentially unable +to play without a fix. + +NOTE: Please read and understand LICENCE before redistributing this software! + +════ Compiling ════ + +• Install the Windows 10 SDK and MSVC toolset via the Visual Studio Installer + (if you don’t care what else gets installed, installing the C++ Desktop + workload should be fine). +• Install native Clang from https://clang.llvm.org (NOT MinGW/MSYS2 Clang!). +• Run compile.bat. + +You might be able to use MSVC instead of Clang for this but I didn’t bother +trying to find out. + +════ Usage ════ + +The default build output is hl2.wrap.exe, which will wrap a neighbouring +hl2.exe. In the likely event that your game binary has a different name, just +match that; for example call it left4dead2.wrap.exe to wrap Left 4 Dead 2. + +Then, in whatever script or shortcut you use to invoke your game binary, simply +invoke the wrapper instead. It will automatically find the original binary +beside itself and start it up with the necessary hook in place to fix the bug. + +IMPORTANT: The wrapper forcibly passes -insecure to every game it wraps, to +reduce the risk of a VAC ban. Even then, do NOT join any servers that were not +also started in insecure mode, or (particularly with older game versions) you +might still be at risk of getting banned even with -insecure set on your end. +YOU HAVE BEEN WARNED! + +Lastly, note that if you don’t have more than 24 threads, this thing won’t +exactly do anything useful, so if you’re bundling this into an old version of +a game for speedrunning or other purposes, consider having the launch script +first check the core count and bypass this if it’s unnecessary. It doesn’t +really matter a huge deal, but you might as well. + +Thanks, and have fun! +- Michael Smith <mikesmiffy128@gmail.com> diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 0000000..62e0471 --- /dev/null +++ b/TODO.txt @@ -0,0 +1,3 @@ +* -DWIN32_LEAN_AND_MEAN -DNOMINMAX -DWINCRT_NO_FLTUSED, remove #defines
+* move into sst, reuse intdefs and x86, add #ifndef WINCRT_NO_FLTUSED to wincrt.c
+* figure out distribution???
diff --git a/compile.bat b/compile.bat new file mode 100644 index 0000000..3194e14 --- /dev/null +++ b/compile.bat @@ -0,0 +1,13 @@ +:: This file is dedicated to the public domain.
+@echo off
+
+llvm-rc /nologo /r /fo tmp.res src/exe.rc
+clang-cl -m32 -mno-sse -fuse-ld=lld -flto -O1 -GR- -GS- -Gs9999999 -EHa- -Oi ^
+-Fehl2.wrap.exe -W3 -Wpedantic -DWIN32_LEAN_AND_MEAN -DNOMINMAX ^
+src/injected.c src/wincrt.c src/wrap.c src/x86.c ^
+-link -nodefaultlib -subsystem:windows,6.0 -stack:0x10000,0x10000 -fixed:no ^
+-dynamicbase -Brepro ^
+kernel32.lib user32.lib tmp.res
+del tmp.res & exit /b %errorlevel%
+
+:: vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/dist/LICENCE-threadfix b/dist/LICENCE-threadfix new file mode 100644 index 0000000..0de6541 --- /dev/null +++ b/dist/LICENCE-threadfix @@ -0,0 +1,19 @@ +Source Thread Fix is released under the following copyright licence:
+════════════════════════════════════════════════════════════════════════════════
+Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com>
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+════════════════════════════════════════════════════════════════════════════════
+Please respect these terms when distributing copies of the game wrapper
+executable — doing so is as simple as keeping this LICENCE-threadfix file in
+place. Thanks, and have fun! :^)
diff --git a/src/exe.rc b/src/exe.rc new file mode 100644 index 0000000..e826184 --- /dev/null +++ b/src/exe.rc @@ -0,0 +1,33 @@ +/* This file is dedicated to the public domain. */ + +#include "version.h" + +#define EN_GB 0x809 + +1 VERSIONINFO +FILEVERSION VERSION_MAJOR,VERSION_MINOR,0,0 +PRODUCTVERSION VERSION_MAJOR,VERSION_MINOR,0,0 +FILEFLAGSMASK 0x17L +FILEFLAGS 0 +FILEOS 4 + +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "080904b0" + BEGIN + VALUE "FileDescription", "High thread count fix wrapper for Source" + VALUE "FileVersion", VERSION + VALUE "InternalName", "Source Thread Fix" + VALUE "LegalCopyright", "Michael Smith 2024; ISC licence" + VALUE "ProductName", "Source Thread Fix" + VALUE "ProductVersion", VERSION + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", EN_GB, 1200 + END +END + +// vi: sw=4 ts=4 noet tw=80 cc=80 diff --git a/src/fakeiat.h b/src/fakeiat.h new file mode 100644 index 0000000..3e51381 --- /dev/null +++ b/src/fakeiat.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef INC_FAKEIAT_H +#define INC_FAKEIAT_H + +struct _SYSTEM_INFO; + +typedef int (*__stdcall _iat_FlushInstructionCache_func)(void *, const void *, + unsigned long); +typedef void (*__stdcall _iat_GetSystemInfo_func)(struct _SYSTEM_INFO *); +typedef int (*__stdcall _iat_VirtualProtect_func)(void *, unsigned long, + unsigned long, unsigned long *); + +// Because this is one EXE (not a DLL), injecting it with LoadLibrary doesn't +// fill out the IAT properly, causing crashes when API functions are used. We +// _could_ just manually populate/fix up the IAT, but that's kind of a pain in +// the arse. Instead, we use this poor-man's IAT to pass down literally three +// functions that are used inside of the child process' address space. +extern struct _fakeiat { + _iat_FlushInstructionCache_func FlushInstructionCache; + _iat_GetSystemInfo_func GetSystemInfo; + _iat_VirtualProtect_func VirtualProtect; +} IAT; + +#ifdef FAKEIAT_DEFINES +#define FlushInstructionCache (IAT.FlushInstructionCache) +#define GetSystemInfo (IAT.GetSystemInfo) +#define VirtualProtect (IAT.VirtualProtect) +#endif + +#endif diff --git a/src/injected.c b/src/injected.c new file mode 100644 index 0000000..bd69474 --- /dev/null +++ b/src/injected.c @@ -0,0 +1,77 @@ +/* + * Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdbool.h> +#include <Windows.h> + +#define FAKEIAT_DEFINES +#include "fakeiat.h" +#include "intdefs.h" +#include "x86.h" + +// The stuff in this file gets called in the address space of the child process! +// Because we're an EXE, our IAT doesn't get filled properly, so in order to +// call kernel32 functions without crashing, make sure they're in the fake IAT. +// See fakeiat.h and wrap.c. + +__declspec(align(4096)) +static uchar trampoline[4096]; // has to be a whole page, obviously + +// simplified version of the usual hook.c code since there's only a single +// function to hook and no need to unhook +static bool hook(void *func_, void *target) { + ulong old; + if (!VirtualProtect(trampoline, sizeof(trampoline), + PAGE_EXECUTE_READWRITE, &old)) { + return false; + } + uchar *func = func_; + while (*func == X86_JMPIW) func += *(int *)(func + 1) + 5; + if (!VirtualProtect(func, 5, PAGE_EXECUTE_READWRITE, &old)) return false; + int len = 0; + for (;;) { + if (func[len] == X86_CALL) return false; + int ilen = x86_len(func + len); + if (ilen == -1) return false; + len += ilen; + if (len >= 5) break; + if (func[len] == X86_JMPIW) return false; + } + memcpy(trampoline, func, len); + trampoline[len] = X86_JMPIW; + uint diff = func - (trampoline + 5); // goto the continuation + memcpy(trampoline + len + 1, &diff, 4); + diff = (uchar *)target - (func + 5); // goto the hook target + func[0] = X86_JMPIW; + memcpy(func + 1, &diff, 4); + // -1 is the current process, and it's a constant in the WDK, so it's + // assumed we can safely avoid the useless GetCurrentProcess call + FlushInstructionCache((void *)-1, func, 5); + return true; +} + +typedef void (*__stdcall GetSystemInfo_func)(SYSTEM_INFO *info); +#define orig_GetSystemInfo ((GetSystemInfo_func)trampoline) +static void __stdcall hook_GetSystemInfo(SYSTEM_INFO *info) { + orig_GetSystemInfo(info); + // Here's where the magic happens! NOTE: the actual limit is a bit higher + // than this, but there's probably not much reason to go over this either. + if (info->dwNumberOfProcessors > 24) info->dwNumberOfProcessors = 24; +} + +__declspec(noinline) int __stdcall injectedentry(int unused) { + return hook((void *)GetSystemInfo, (void *)&hook_GetSystemInfo); +} diff --git a/src/intdefs.h b/src/intdefs.h new file mode 100644 index 0000000..97c6f82 --- /dev/null +++ b/src/intdefs.h @@ -0,0 +1,25 @@ +/* This file is dedicated to the public domain. */ + +#ifndef INC_INTDEFS_H +#define INC_INTDEFS_H + +typedef signed char schar; +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef long long vlong; +typedef unsigned long long uvlong; + +typedef schar s8; +typedef uchar u8; +typedef short s16; +typedef ushort u16; +typedef int s32; +typedef uint u32; +typedef vlong s64; +typedef uvlong u64; + +#endif + +// vi: sw=4 ts=4 noet tw=80 cc=80 diff --git a/src/version.h b/src/version.h new file mode 100644 index 0000000..89d1553 --- /dev/null +++ b/src/version.h @@ -0,0 +1,3 @@ +#define VERSION "1.0" +#define VERSION_MAJOR 1 +#define VERSION_MINOR 0 diff --git a/src/wincrt.c b/src/wincrt.c new file mode 100644 index 0000000..ae24045 --- /dev/null +++ b/src/wincrt.c @@ -0,0 +1,37 @@ +/* This file is dedicated to the public domain. */ + +// TODO(opt): this feels like a sad implementation, can we do marginally better? +int memcmp(const void *x_, const void *y_, unsigned int sz) { + const char *x = x_, *y = y_; + for (unsigned int i = 0; i < sz; ++i) { + if (x[i] > y[i]) return 1; + if (x[i] < y[i]) return -1; + } + return 0; +} + +void *memcpy(void *restrict x, const void *restrict y, unsigned int sz) { +#ifdef __clang__ + __asm__ volatile ( + "rep movsb\n" : + "=D" (x), "=S" (y), "=c" (sz) : + "0" (x), "1" (y), "2" (sz) : + "memory" + ); +#else // terrible fallback just in case someone wants to use this with MSVC + char *restrict xb = x; const char *restrict yb = y; + for (unsigned int i = 0; i < sz; ++i) xb[i] = yb[i]; +#endif + return x; +} + +// this was briefly needed at some point in debugging but seems to be gone again +// (hence crappy impl). if compiler starts calling memset with opts on, we +// should use a proper rep stosb impl as well +//void *memset(void *x, int c, unsigned int n) { +// char *xb = x; +// for (; n; ++xb, --n) *xb = c; +// return x; +//} + +// vi: sw=4 ts=4 noet tw=80 cc=80 diff --git a/src/wrap.c b/src/wrap.c new file mode 100644 index 0000000..f856cb2 --- /dev/null +++ b/src/wrap.c @@ -0,0 +1,145 @@ +/* + * Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdbool.h> +#include <Windows.h> + +#include "fakeiat.h" +#include "intdefs.h" + +extern struct HINSTANCE__ __ImageBase; + +struct _fakeiat IAT; + +static int len(const ushort *s) { + int i = 0; + for (; *s; ++s) ++i; + return i; +} + +static _Noreturn void die(int status, const ushort *message) { + MessageBoxW(0, message, L"Thread fix wrapper error", 0); + ExitProcess(status); +} + +__declspec(noinline) int __stdcall injectedentry(int unused); // injected.c + +static void *rpc(void *proc, void *rfunc, void *rparam, const ushort *errstr) { + void *rthread = CreateRemoteThread(proc, 0, 32768, + (LPTHREAD_START_ROUTINE)rfunc, rparam, 0, 0); + if (!rthread) { + TerminateProcess(proc, -1); + die(100, errstr); + } + WaitForSingleObject(rthread, INFINITE); + void *ret; + GetExitCodeThread(rthread, (ulong *)&ret); + return ret; +} + +// main EXE entry point. this seems not to get called when we're LoadLibrary'd! +_Noreturn void __stdcall WinMainCRTStartup(void) { + ushort name[MAX_PATH], origname[MAX_PATH]; + ushort cmdline[32678]; + ushort *myargs = GetCommandLineW(); + bool quote = false, oddslash = false; + for (;; ++myargs) { + if (*myargs == '\0') die(1, L"Unexpected end of command line"); + if (*myargs == '\\') { + oddslash = !oddslash; + } + else { + if (*myargs == '"') { if (!oddslash) quote = !quote; } + else if ((*myargs == ' ' || *myargs == '\t') && !quote) break; + oddslash = false; + } + } + while (*++myargs == ' ' || *myargs == '\t'); + if (len(myargs) > 32767 - MAX_PATH - sizeof("\"\"-steam -insecure ") - 1) { + die(1, L"Command line is too long"); + } + int namelen = GetModuleFileNameW(0, name, MAX_PATH); + if (namelen < sizeof("x.wrap.exe") - 1 || + memcmp(name + namelen - 9, L".wrap.exe", 9)) { + die(2, L"Wrapper name must end in .wrap.exe"); + } + cmdline[0] = L'"'; + int i = 0; + for (; i < namelen - 9; ++i) { + origname[i] = name[i]; + cmdline[i + 1] = name[i]; // XXX: assuming no quotes etc. prolly fine? + } + memcpy(origname + i, L".exe", 4 * sizeof(*origname)); + memcpy(cmdline + i + 1, L".exe\" -steam -insecure ", 23 * sizeof(*cmdline)); + const ushort *p = myargs; ushort *q = cmdline + i + 24; + do *q++ = *p; while (*p++); + PROCESS_INFORMATION info; + STARTUPINFOW startinfo = {.cb = sizeof(startinfo)}; + if (!CreateProcessW(origname, cmdline, 0, 0, 0, CREATE_SUSPENDED, 0, 0, + &startinfo, &info)) { + die(100, L"Couldn't start subprocess"); + } + // avoid any possible thunky weirdness using GPA rather than &LoadLibraryW + void *k32 = GetModuleHandleW(L"kernel32.dll"); + if (!k32) die(100, L"Couldn't get kernel32 module; everything is on fire!"); + void *lladdr = (void *)GetProcAddress(k32, "LoadLibraryW"); + int namebytes = (namelen + 1) * sizeof(*name); + int rsize = sizeof("ThreadFixEntryPoint"); + if (namebytes > rsize) rsize = namebytes; + void *rmem = VirtualAllocEx(info.hProcess, 0, rsize, + MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (!rmem) { + TerminateProcess(info.hProcess, -1); + die(100, L"Couldn't allocate memory in subprocess"); + } + WriteProcessMemory(info.hProcess, rmem, name, namebytes, 0); + void *rdll = rpc(info.hProcess, lladdr, rmem, + L"Couldn't call LoadLibrary in subprocess"); + if (!rdll) { + TerminateProcess(info.hProcess, -1); + die(100, L"LoadLibrary call in subprocess returned an error"); + } + // injectentry will be at the same offset, just a different base + void *rfunc = (char *)rdll + ((char *)&injectedentry - (char *)&__ImageBase); + VirtualFreeEx(info.hProcess, rmem, rsize, MEM_RELEASE); + // Fill out the "fake IAT" table and use WPM to copy it to the injected side + // of things. See fakeiat.h for more exposition. +#define PUTIAT(f) IAT.f = (_iat_##f##_func)GetProcAddress(k32, #f) + PUTIAT(GetSystemInfo); + PUTIAT(FlushInstructionCache); + PUTIAT(VirtualProtect); +#undef PUTIAT + IAT.GetSystemInfo = (_iat_GetSystemInfo_func)GetProcAddress( + k32, "GetSystemInfo"); + IAT.FlushInstructionCache = (_iat_FlushInstructionCache_func)GetProcAddress( + k32, "FlushInstructionCache"); + IAT.VirtualProtect = (_iat_VirtualProtect_func)GetProcAddress( + k32, "VirtualProtect"); + IAT.FlushInstructionCache = &FlushInstructionCache; + IAT.VirtualProtect = &VirtualProtect; + void *riat = (char *)rdll + ((char *)&IAT - (char *)&__ImageBase); + WriteProcessMemory(info.hProcess, riat, &IAT, sizeof(IAT), 0); + if (!rpc(info.hProcess, rfunc, 0, + L"Couldn't call injected entry point in subprocess")) { + die(100, L"Injected code failed to hook GetSystemInfo"); + } + ResumeThread(info.hThread); + CloseHandle(info.hThread); + WaitForSingleObject(info.hProcess, INFINITE); + ulong status; + GetExitCodeProcess(info.hProcess, &status); + ExitProcess(status); +} diff --git a/src/x86.c b/src/x86.c new file mode 100644 index 0000000..7a5d00e --- /dev/null +++ b/src/x86.c @@ -0,0 +1,96 @@ +/* + * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#include "intdefs.h" +#include "x86.h" + +static int mrmsib(const uchar *p, int addrlen) { + // I won't lie: I thought I almost understood this, but after Bill walked me + // through correcting a bunch of wrong cases I now realise that I don't + // really understand it at all. If it helps, I used this as a reference: + // https://github.com/Nomade040/length-disassembler/blob/e8b34546/ldisasm.cpp#L14 + // But it's confusingly-written enough that the code I wrote before didn't + // work, so with any luck nobody will need to refer to it again and this is + // actually correct now. Fingers crossed. + if (addrlen == 4 || *p & 0xC0) { + int sib = addrlen == 4 && *p < 0xC0 && (*p & 7) == 4; + switch (*p & 0xC0) { + // disp8 + case 0x40: return 2 + sib; + // disp16/32 + case 0: + if ((*p & 7) != 5) { + // disp8/32 via SIB + if (sib && (p[1] & 7) == 5) return *p & 0x40 ? 3 : 6; + return 1 + sib; + } + case 0x80: return 1 + addrlen + sib; + } + } + if (addrlen == 2 && *p == 0x26) return 3; + return 1; // note: include the mrm itself in the byte count +} + +int x86_len(const void *insn_) { +#define CASES(name, _) case name: + const uchar *insn = insn_; + int pfxlen = 0, addrlen = 4, operandlen = 4; + +p: switch (*insn) { + case X86_PFX_ADSZ: addrlen = 2; goto P; // bit dumb sorry + case X86_PFX_OPSZ: operandlen = 2; +P: X86_SEG_PREFIXES(CASES) + case X86_PFX_LOCK: case X86_PFX_REPN: case X86_PFX_REP: + // instruction can only be 15 bytes. this could go over, oh well, + // just don't want to loop for 8 million years + if (++pfxlen == 14) return -1; + ++insn; + goto p; + } + + switch (*insn) { + X86_OPS_1BYTE_NO(CASES) return pfxlen + 1; + X86_OPS_1BYTE_I8(CASES) operandlen = 1; + X86_OPS_1BYTE_IW(CASES) return pfxlen + 1 + operandlen; + X86_OPS_1BYTE_I16(CASES) return pfxlen + 3; + X86_OPS_1BYTE_MRM(CASES) return pfxlen + 1 + mrmsib(insn + 1, addrlen); + X86_OPS_1BYTE_MRM_I8(CASES) operandlen = 1; + X86_OPS_1BYTE_MRM_IW(CASES) + return pfxlen + 1 + operandlen + mrmsib(insn + 1, addrlen); + case X86_ENTER: return pfxlen + 4; + case X86_CRAZY8: operandlen = 1; + case X86_CRAZYW: + if ((insn[1] & 0x38) >= 0x10) operandlen = 0; + return pfxlen + 1 + operandlen + mrmsib(insn + 1, addrlen); + case X86_2BYTE: ++insn; goto b2; + } + return -1; + +b2: switch (*insn) { + // we don't support any 3 byte ops for now, implement if ever needed... + case X86_3BYTE1: case X86_3BYTE2: case X86_3DNOW: return -1; + X86_OPS_2BYTE_NO(CASES) return pfxlen + 2; + X86_OPS_2BYTE_IW(CASES) return pfxlen + 2 + operandlen; + X86_OPS_2BYTE_MRM(CASES) return pfxlen + 2 + mrmsib(insn + 1, addrlen); + X86_OPS_2BYTE_MRM_I8(CASES) operandlen = 1; + return pfxlen + 2 + operandlen + mrmsib(insn + 1, addrlen); + } + + return -1; +#undef CASES +} + +// vi: sw=4 ts=4 noet tw=80 cc=80 diff --git a/src/x86.h b/src/x86.h new file mode 100644 index 0000000..197d4c3 --- /dev/null +++ b/src/x86.h @@ -0,0 +1,565 @@ +/* + * Copyright © 2023 Michael Smith <mikesmiffy128@gmail.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef INC_X86_H +#define INC_X86_H + +/* + * Opcode-based X86 instruction analysis. In other words, *NOT* a disassembler. + * Only cares about the instructions we expect to see in basic 32-bit userspace + * functions; there's no kernel-mode instructions, no SSE 3+, no AVX, no REX, + * EVEX, yadda yadda. + */ + +// XXX: no BOUND (0x62): ambiguous with EVEX prefix - can't be arsed! + +/* Instruction prefixes: segments */ +#define X86_SEG_PREFIXES(X) \ + X(X86_PFX_ES, 0x26) \ + X(X86_PFX_CS, 0x2E) \ + X(X86_PFX_SS, 0x36) \ + X(X86_PFX_DS, 0x3E) \ + X(X86_PFX_FS, 0x64) \ + X(X86_PFX_GS, 0x65) + +/* Instruction prefixes: operations */ +#define X86_OP_PREFIXES(X) \ + X(X86_PFX_OPSZ, 0x66) \ + X(X86_PFX_ADSZ, 0x67) \ + X(X86_PFX_LOCK, 0xF0) \ + X(X86_PFX_REPN, 0xF2) \ + X(X86_PFX_REP, 0xF3) + +/* All instruction prefixes */ +#define X86_PREFIXES(X) X86_SEG_PREFIXES(X) X86_OP_PREFIXES(X) + +/* Single-byte opcodes with no operands */ +#define X86_OPS_1BYTE_NO(X) \ + X(X86_PUSHES, 0x06) \ + X(X86_POPES, 0x07) \ + X(X86_PUSHCS, 0x0E) \ + X(X86_PUSHSS, 0x16) \ + X(X86_POPSS, 0x17) \ + X(X86_PUSHDS, 0x1E) \ + X(X86_POPDS, 0x1F) \ + X(X86_DAA, 0x27) \ + X(X86_DAS, 0x2F) \ + X(X86_AAA, 0x37) \ + X(X86_AAS, 0x3F) \ + X(X86_INCEAX, 0x40) \ + X(X86_INCECX, 0x41) \ + X(X86_INCEDX, 0x42) \ + X(X86_INCEBX, 0x43) \ + X(X86_INCESP, 0x44) \ + X(X86_INCEBP, 0x45) \ + X(X86_INCESI, 0x46) \ + X(X86_INCEDI, 0x47) \ + X(X86_DECEAX, 0x48) \ + X(X86_DECECX, 0x49) \ + X(X86_DECEDX, 0x4A) \ + X(X86_DECEBX, 0x4B) \ + X(X86_DECESP, 0x4C) \ + X(X86_DECEBP, 0x4D) \ + X(X86_DECESI, 0x4E) \ + X(X86_DECEDI, 0x4F) \ + X(X86_PUSHEAX, 0x50) \ + X(X86_PUSHECX, 0x51) \ + X(X86_PUSHEDX, 0x52) \ + X(X86_PUSHEBX, 0x53) \ + X(X86_PUSHESP, 0x54) \ + X(X86_PUSHEBP, 0x55) \ + X(X86_PUSHESI, 0x56) \ + X(X86_PUSHEDI, 0x57) \ + X(X86_POPEAX, 0x58) \ + X(X86_POPECX, 0x59) \ + X(X86_POPEDX, 0x5A) \ + X(X86_POPEBX, 0x5B) \ + X(X86_POPESP, 0x5C) \ + X(X86_POPEBP, 0x5D) \ + X(X86_POPESI, 0x5E) \ + X(X86_POPEDI, 0x5F) \ + X(X86_PUSHA, 0x60) \ + X(X86_POPA, 0x61) \ + X(X86_NOP, 0x90) \ + X(X86_XCHGECXEAX, 0x91) \ + X(X86_XCHGEDXEAX, 0x92) \ + X(X86_XCHGEBXEAX, 0x93) \ + X(X86_XCHGESPEAX, 0x94) \ + X(X86_XCHGEBPEAX, 0x95) \ + X(X86_XCHGESIEAX, 0x96) \ + X(X86_XCHGEDIEAX, 0x97) \ + X(X86_CWDE, 0x98) \ + X(X86_CDQ, 0x99) \ + X(X86_WAIT, 0x9B) \ + X(X86_PUSHF, 0x9C) \ + X(X86_POPF, 0x9D) \ + X(X86_SAHF, 0x9E) \ + X(X86_LAHF, 0x9F) \ + X(X86_MOVS8, 0xA4) \ + X(X86_MOVSW, 0xA5) \ + X(X86_CMPS8, 0xA6) \ + X(X86_CMPSW, 0xA7) \ + X(X86_STOS8, 0xAA) \ + X(X86_STOSD, 0xAB) \ + X(X86_LODS8, 0xAC) \ + X(X86_LODSD, 0xAD) \ + X(X86_SCAS8, 0xAE) \ + X(X86_SCASD, 0xAF) \ + X(X86_RET, 0xC3) \ + X(X86_LEAVE, 0xC9) \ + X(X86_RETF, 0xCB) \ + X(X86_INT3, 0xCC) \ + X(X86_INTO, 0xCE) \ + X(X86_XLAT, 0xD7) \ + X(X86_JMPI8, 0xEB) \ + X(X86_CMC, 0xF5) \ + X(X86_CLC, 0xF8) \ + X(X86_STC, 0xF9) \ + X(X86_CLI, 0xFA) \ + X(X86_STI, 0xFB) \ + X(X86_CLD, 0xFC) \ + X(X86_STD, 0xFD) + +/* Single-byte opcodes with a 1-byte immediate operand */ +#define X86_OPS_1BYTE_I8(X) \ + X(X86_ADDALI, 0x04) \ + X(X86_ORALI, 0x0C) \ + X(X86_ADCALI, 0x14) \ + X(X86_SBBALI, 0x1C) \ + X(X86_ANDALI, 0x24) \ + X(X86_SUBALI, 0x2C) \ + X(X86_XORALI, 0x34) \ + X(X86_CMPALI, 0x3C) \ + X(X86_PUSHI8, 0x6A) \ + X(X86_MOVALII, 0xA0) /* From offset (indirect) */ \ + X(X86_MOVIIAL, 0xA2) /* To offset (indirect) */ \ + X(X86_TESTALI, 0xA8) \ + X(X86_JO, 0x70) \ + X(X86_JNO, 0x71) \ + X(X86_JB, 0x72) /* AKA JC */ \ + X(X86_JNB, 0x73) /* AKA JNC */ \ + X(X86_JZ, 0x74) /* AKA JE */ \ + X(X86_JNZ, 0x75) /* AKA JNZ */ \ + X(X86_JNA, 0x76) /* AKA JBE */ \ + X(X86_JA, 0x77) /* AKA JNBE */ \ + X(X86_JS, 0x78) \ + X(X86_JNS, 0x79) \ + X(X86_JP, 0x7A) \ + X(X86_JNP, 0x7B) \ + X(X86_JL, 0x7C) /* AKA JNGE */ \ + X(X86_JNL, 0x7D) /* AKA JGE */ \ + X(X86_JNG, 0x7E) /* AKA JLE */ \ + X(X86_JG, 0x7F) /* AKA JNLE */ \ + X(X86_MOVALI, 0xB0) \ + X(X86_MOVCLI, 0xB1) \ + X(X86_MOVDLI, 0xB2) \ + X(X86_MOVBLI, 0xB3) \ + X(X86_MOVAHI, 0xB4) \ + X(X86_MOVCHI, 0xB5) \ + X(X86_MOVDHI, 0xB6) \ + X(X86_MOVBHI, 0xB7) \ + X(X86_INT, 0xCD) \ + X(X86_AMX, 0xD4) /* Note: D4 0A is referred to as AAM */ \ + X(X86_ADX, 0xD5) /* Note: D4 0A is referred to as AAD */ \ + X(X86_LOOPNZ, 0xE0) /* AKA LOOPNE */ \ + X(X86_LOOPZ, 0xE1) /* AKA LOOPE */ \ + X(X86_LOOP, 0xE2) \ + X(X86_JCXZ, 0xE3) + +/* Single-byte opcodes with a word-sized immediate operand */ +#define X86_OPS_1BYTE_IW(X) \ + X(X86_ADDEAXI, 0x05) \ + X(X86_OREAXI, 0x0D) \ + X(X86_ADCEAXI, 0x15) \ + X(X86_SBBEAXI, 0x1D) \ + X(X86_ANDEAXI, 0x25) \ + X(X86_SUBEAXI, 0x2D) \ + X(X86_XOREAXI, 0x35) \ + X(X86_CMPEAXI, 0x3D) \ + X(X86_PUSHIW, 0x68) \ + X(X86_MOVEAXII, 0xA1) /* From offset (indirect) */ \ + X(X86_MOVIIEAX, 0xA3) /* To offset (indirect) */ \ + X(X86_TESTEAXI, 0xA9) \ + X(X86_MOVEAXI, 0xB8) \ + X(X86_MOVECXI, 0xB9) \ + X(X86_MOVEDXI, 0xBA) \ + X(X86_MOVEBXI, 0xBB) \ + X(X86_MOVESPI, 0xBC) \ + X(X86_MOVEBPI, 0xBD) \ + X(X86_MOVESII, 0xBE) \ + X(X86_MOVEDII, 0xBF) \ + X(X86_CALL, 0xE8) \ + X(X86_JMPIW, 0xE9) + +/* Single-byte opcodes with 16-bit immediate operands, regardless of prefixes */ +#define X86_OPS_1BYTE_I16(X) \ + X(X86_RETI16, 0xC2) \ + X(X86_RETFI16, 0xCA) + +/* + * Single-byte opcodes with a ModRM. `MR` in a name means the ModRM is the + * destination, `RM` means it's the source. + */ +#define X86_OPS_1BYTE_MRM(X) \ + X(X86_ADDMR8, 0x00) \ + X(X86_ADDMRW, 0x01) \ + X(X86_ADDRM8, 0x02) \ + X(X86_ADDRMW, 0x03) \ + X(X86_ORMR8, 0x08) \ + X(X86_ORMRW, 0x09) \ + X(X86_ORRM8, 0x0A) \ + X(X86_ORRMW, 0x0B) \ + X(X86_ADCMR8, 0x10) \ + X(X86_ADCMRW, 0x11) \ + X(X86_ADCRM8, 0x12) \ + X(X86_ADCRMW, 0x13) \ + X(X86_SBBMR8, 0x18) \ + X(X86_SBBMRW, 0x19) \ + X(X86_SBBRM8, 0x1A) \ + X(X86_SBBRMW, 0x1B) \ + X(X86_ANDMR8, 0x20) \ + X(X86_ANDMRW, 0x21) \ + X(X86_ANDRM8, 0x22) \ + X(X86_ANDRMW, 0x23) \ + X(X86_SUBMR8, 0x28) \ + X(X86_SUBMRW, 0x29) \ + X(X86_SUBRM8, 0x2A) \ + X(X86_SUBRMW, 0x2B) \ + X(X86_XORMR8, 0x30) \ + X(X86_XORMRW, 0x31) \ + X(X86_XORRM8, 0x32) \ + X(X86_XORRMW, 0x33) \ + X(X86_CMPMR8, 0x38) \ + X(X86_CMPMRW, 0x39) \ + X(X86_CMPRM8, 0x3A) \ + X(X86_CMPRMW, 0x3B) \ + X(X86_ARPL, 0x63) \ + X(X86_TESTMR8, 0x84) \ + X(X86_TESTMRW, 0x85) \ + X(X86_XCHGMR8, 0x86) \ + X(X86_XCHGMRW, 0x87) \ + X(X86_MOVMR8, 0x88) \ + X(X86_MOVMRW, 0x89) \ + X(X86_MOVRM8, 0x8A) \ + X(X86_MOVRMW, 0x8B) \ + X(X86_MOVMS, 0x8C) /* Load 4 bytes from segment register */ \ + X(X86_LEA, 0x8D) \ + X(X86_MOVSM, 0x8E) /* Store 4 bytes to segment register */ \ + X(X86_POPM, 0x8F) \ + X(X86_LES, 0xC4) \ + X(X86_LDS, 0xC5) \ + X(X86_SHIFTM18, 0xD0) /* Shift/roll by 1 place */ \ + X(X86_SHIFTM1W, 0xD1) /* Shift/roll by 1 place */ \ + X(X86_SHIFTMCL8, 0xD2) /* Shift/roll by CL places */ \ + X(X86_SHIFTMCLW, 0xD3) /* Shift/roll by CL places */ \ + X(X86_FLTBLK1, 0xD8) /* Various float ops (1/8) */ \ + X(X86_FLTBLK2, 0xD9) /* Various float ops (2/8) */ \ + X(X86_FLTBLK3, 0xDA) /* Various float ops (3/8) */ \ + X(X86_FLTBLK4, 0xDB) /* Various float ops (4/8) */ \ + X(X86_FLTBLK5, 0xDC) /* Various float ops (5/8) */ \ + X(X86_FLTBLK6, 0xDD) /* Various float ops (6/8) */ \ + X(X86_FLTBLK7, 0xDE) /* Various float ops (7/8) */ \ + X(X86_FLTBLK8, 0xDF) /* Various float ops (8/8) */ \ + X(X86_MISCM8, 0xFE) /* Only documented for MRM.reg in {0, 1} */ \ + X(X86_MISCMW, 0xFF) + +/* Single-byte opcodes with a ModRM and a 1-byte immediate operand */ +#define X86_OPS_1BYTE_MRM_I8(X) \ + X(X86_IMULMI8, 0x6B) /* 3-operand multiply */ \ + X(X86_ALUMI8, 0x80) /* ALU op in MRM.reg, from immediate */ \ + X(X86_ALUMI8X, 0x82) /* ALU op in MRM.reg, from immediate, redundant?? */ \ + X(X86_ALUMI8S, 0x83) /* ALU op in MRM.reg, from immediate, sign-extend */ \ + X(X86_SHIFTMI8, 0xC0) /* Shift/roll by imm8 places */ \ + X(X86_SHIFTMIW, 0xC1) /* Shift/roll by imm8 places */ \ + X(X86_MOVMI8, 0xC6) /* Note: RM.reg must be 0 */ + +/* Single-byte opcodes with a ModRM and a word-sized immediate operand */ +#define X86_OPS_1BYTE_MRM_IW(X) \ + X(X86_IMULMIW, 0x69) /* 3-operand multiply */ \ + X(X86_ALUMIW, 0x81) /* ALU op in MRM.reg, from immediate */ \ + X(X86_MOVMIW, 0xC7) /* Note: MRM.reg must be 0 */ + +/* All single-byte x86 instructions */ +#define X86_OPS_1BYTE(X) \ + X86_OPS_1BYTE_NO(X) \ + X86_OPS_1BYTE_I8(X) \ + X86_OPS_1BYTE_IW(X) \ + X86_OPS_1BYTE_I16(X) \ + X86_OPS_1BYTE_MRM(X) \ + X86_OPS_1BYTE_MRM_I8(X) \ + X86_OPS_1BYTE_MRM_IW(X) \ + X(X86_ENTER, 0xC8) /* Dumb special case insn: imm16 followed by imm8 */ \ + X(X86_CRAZY8, 0xF6) /* CRAZY reg-encoded block, has imm8 IFF reg < 2 */ \ + X(X86_CRAZYW, 0xF7) /* CRAZY reg-encoded block, has imm32/16 IFF reg < 2 */ + +/* Second bytes of opcodes with no operands */ +#define X86_OPS_2BYTE_NO(X) \ + X(X86_2B_RDTSC, 0x31) \ + X(X86_2B_RDPMD, 0x33) \ + X(X86_2B_SYSENTER, 0x34) \ + X(X86_2B_PUSHFS, 0xA0) \ + X(X86_2B_POPFS, 0xA1) \ + X(X86_2B_CPUID, 0xA2) \ + X(X86_2B_PUSHGS, 0xA8) \ + X(X86_2B_POPGS, 0xA9) \ + X(X86_2B_RSM, 0xAA) \ + X(X86_2B_BSWAPEAX, 0xC8) \ + X(X86_2B_BSWAPECX, 0xC9) \ + X(X86_2B_BSWAPEDX, 0xCA) \ + X(X86_2B_BSWAPEBX, 0xCB) \ + X(X86_2B_BSWAPESP, 0xCC) \ + X(X86_2B_BSWAPEBP, 0xCD) \ + X(X86_2B_BSWAPESI, 0xCE) \ + X(X86_2B_BSWAPEDI, 0xCF) \ + /* MMX instruction */ \ + X(X86_2B_EMMS, 0x77) + +/* Second bytes of opcodes with a word-sized immediate operand */ +#define X86_OPS_2BYTE_IW(X) \ + X(X86_2B_JOII, 0x80) /* From offset (indirect) */ \ + X(X86_2B_JNOII, 0x81) /* From offset (indirect) */ \ + X(X86_2B_JBII, 0x82) /* AKA JC; from offset (indirect) */ \ + X(X86_2B_JNBII, 0x83) /* AKA JNC; from offset (indirect) */ \ + X(X86_2B_JZII, 0x84) /* AKA JE; from offset (indirect) */ \ + X(X86_2B_JNZII, 0x85) /* AKA JNZ; from offset (indirect) */ \ + X(X86_2B_JNAII, 0x86) /* AKA JBE; from offset (indirect) */ \ + X(X86_2B_JAII, 0x87) /* AKA JNBE; from offset (indirect) */ \ + X(X86_2B_JSII, 0x88) /* From offset (indirect) */ \ + X(X86_2B_JNSII, 0x89) /* From offset (indirect) */ \ + X(X86_2B_JPII, 0x8A) /* From offset (indirect) */ \ + X(X86_2B_JNPII, 0x8B) /* From offset (indirect) */ \ + X(X86_2B_JLII, 0x8C) /* AKA JNGE; from offset (indirect) */ \ + X(X86_2B_JNLII, 0x8D) /* AKA JGE; from offset (indirect) */ \ + X(X86_2B_JNGII, 0x8E) /* AKA JLE; from offset (indirect) */ \ + X(X86_2B_JGII, 0x8F) /* AKA JNLE; from offset (indirect) */ + +/* Second bytes of opcodes with a ModRM */ +#define X86_OPS_2BYTE_MRM(X) \ + X(X86_2B_NOP, 0x0D) /* Variable length NOP (3-9 with prefix) */ \ + X(X86_2B_HINTS1, 0x18) /* Prefetch and hint-nop block 1/8 */ \ + X(X86_2B_HINTS2, 0x19) /* Prefetch and hint-nop block 2/8 */ \ + X(X86_2B_HINTS3, 0x1A) /* Prefetch and hint-nop block 3/8 */ \ + X(X86_2B_HINTS4, 0x1B) /* Prefetch and hint-nop block 4/8 */ \ + X(X86_2B_HINTS5, 0x1C) /* Prefetch and hint-nop block 5/8 */ \ + X(X86_2B_HINTS6, 0x1D) /* Prefetch and hint-nop block 6/8 */ \ + X(X86_2B_HINTS7, 0x1E) /* Prefetch and hint-nop block 7/8 */ \ + X(X86_2B_HINTS8, 0x1F) /* Prefetch and hint-nop block 8/8 */ \ + X(X86_2B_CMOVO, 0x40) \ + X(X86_2B_CMOVNO, 0x41) \ + X(X86_2B_CMOVB, 0x42) /* AKA CMOVC */ \ + X(X86_2B_CMOVNB, 0x43) /* AKA CMOVNC */ \ + X(X86_2B_CMOVZ, 0x44) /* AKA CMOVE */ \ + X(X86_2B_CMOVNZ, 0x45) /* AKA CMOVNE */ \ + X(X86_2B_CMOVNA, 0x46) /* AKA CMOVBE */ \ + X(X86_2B_CMOVA, 0x47) /* AKA CMOVNBE */ \ + X(X86_2B_CMOVS, 0x48) \ + X(X86_2B_CMOVNS, 0x49) \ + X(X86_2B_CMOVP, 0x4A) \ + X(X86_2B_CMOVNP, 0x4B) \ + X(X86_2B_CMOVL, 0x4C) /* AKA CMOVNGE */ \ + X(X86_2B_CMOVNL, 0x4D) /* AKA CMOVGE */ \ + X(X86_2B_CMOVNG, 0x4E) /* AKA CMOVLE */ \ + X(X86_2B_CMOVG, 0x4F) /* AKA CMOVNLE */ \ + X(X86_2B_SETO, 0x90) \ + X(X86_2B_SETNO, 0x91) \ + X(X86_2B_SETB, 0x92) /* AKA SETC */ \ + X(X86_2B_SETNB, 0x93) /* AKA SETNC */ \ + X(X86_2B_SETZ, 0x94) /* AKA SETE */ \ + X(X86_2B_SETNZ, 0x95) /* AKA SETNZ */ \ + X(X86_2B_SETNA, 0x96) /* AKA SETBE */ \ + X(X86_2B_SETA, 0x97) /* AKA SETNBE */ \ + X(X86_2B_SETS, 0x98) \ + X(X86_2B_SETNS, 0x99) \ + X(X86_2B_SETP, 0x9A) \ + X(X86_2B_SETNP, 0x9B) \ + X(X86_2B_SETL, 0x9C) /* AKA SETNGE */ \ + X(X86_2B_SETNL, 0x9D) /* AKA SETGE */ \ + X(X86_2B_SETNG, 0x9E) /* AKA SETLE */ \ + X(X86_2B_SETG, 0x9F) /* AKA SETNLE */ \ + X(X86_2B_BTMR, 0xA3) \ + X(X86_2B_SHLDMRCL, 0xA5) \ + X(X86_2B_BTS, 0xAB) \ + X(X86_2B_SHRDMRCL, 0xAD) \ + X(X86_2B_MISC, 0xAE) /* Float env stuff, memory fences */ \ + X(X86_2B_IMUL, 0xAF) \ + X(X86_2B_CMPXCHG8, 0xB0) \ + X(X86_2B_CMPXCHGW, 0xB1) \ + X(X86_2B_MOVZX8, 0xB6) \ + X(X86_2B_MOVZXW, 0xB7) \ + X(X86_2B_POPCNT, 0xB8) \ + X(X86_2B_BTCRM, 0xBB) \ + X(X86_2B_BSF, 0xBC) \ + X(X86_2B_BSR, 0xBD) \ + X(X86_2B_MOVSX8, 0xBE) \ + X(X86_2B_MOVSXW, 0xBF) \ + X(X86_2B_XADDRM8, 0xC0) \ + X(X86_2B_XADDRMW, 0xC1) \ + /* NOTE: this one is actually a block with some VMX stuff too; it's only + CMPXCHG64 (CMPXCHG8B if you prefer) if MRM.reg = 1, but naming it this + way seemed more useful since it's what you'll see in normal userspace + programs, which is what we're interested in. */ \ + X(X86_2B_CMPXCHG64, 0xC7) \ + /* -- MMX/SSE1/SSE2 instructions -- */ \ + /* XXX: some of the naming here isn't totally perfect */ \ + X(X86_2B_MOVRM128, 0x10) /* MOVUPS/MOVSS/MOVUPD/MOVD via prefix */ \ + X(X86_2B_MOVMR128, 0x11) /* MOVUPS/MOVSS/MOVUPD/MOVD via prefix */ \ + X(X86_2B_MOVLRM, 0x12) /* MOV(H)LPS/MOVLPD/MOVDDUP/MOVSLDUP */ \ + X(X86_2B_MOVLMR, 0x13) /* MOVLP{S,D} */ \ + X(X86_2B_UNPCKL, 0x14) /* UNPCKLP{S,D} */ \ + X(X86_2B_UNPCKH, 0x15) /* UNPCKHPS/UNPCKHPD */ \ + X(X86_2B_MOVHRM, 0x16) /* MOV(L)HPS/MOVHPD/MOVSHDUP */ \ + X(X86_2B_MOVHMR, 0x17) /* MOVHPS/MOVHPD */ \ + X(X86_2B_MOVARM, 0x28) /* MOVAP{S,D} via prefix */ \ + X(X86_2B_MOVAMR, 0x29) /* MOVAP{S,D} */ \ + X(X86_2B_CVTIF64, 0x2A) /* CVTxI2x{S,D} */ \ + X(X86_2B_MOVNT, 0x2B) /* MOVNTP{S,D} */ \ + X(X86_2B_CVTFT64, 0x2C) /* CVTTx{S,D}2xI */ \ + X(X86_2B_CVTFI64, 0x2D) /* CVTx{S,D}2xI */ \ + X(X86_2B_UCOMI, 0x2E) /* UCOMIS{S,D} */ \ + X(X86_2B_COMI, 0x2F) /* COMIS{S,D} */ \ + X(X86_2B_MOVMSK, 0x50) /* MOVMSDKP{S,D} */ \ + X(X86_2B_SQRT, 0x51) /* SQRT{P,S}{S,D} */ \ + X(X86_2B_RSQRT, 0x52) /* RSQRT{P,S}{S,D} */ \ + X(X86_2B_RCP, 0x53) /* RCP{P,S}S */ \ + X(X86_2B_AND, 0x54) /* ANDP{S,D} */ \ + X(X86_2B_ANDN, 0x55) /* ANDNP{S,D} */ \ + X(X86_2B_OR, 0x56) /* ORP{S,D} */ \ + X(X86_2B_XOR, 0x57) /* XORP{S,D} */ \ + X(X86_2B_ADD, 0x58) /* ADD{P,S}{S,D} */ \ + X(X86_2B_MUL, 0x59) /* MUL{P,S}{S,D} */ \ + X(X86_2B_CVTFF128, 0x5A) /* CVTxS2xD/CVTxS2xS */ \ + X(X86_2B_CVTFI128, 0x5B) /* CVTDQ2PS/CVTPS2DQ/CVTTPS2DQ */ \ + X(X86_2B_SUB, 0x5C) /* SUB{P,S}{S,D} */ \ + X(X86_2B_DIV, 0x5D) /* DIV{P,S}{S,D} */ \ + X(X86_2B_MIN, 0x5E) /* MIN{P,S}{S,D} */ \ + X(X86_2B_MAX, 0x5F) /* MAX{P,S}{S,D} */ \ + X(X86_2B_PUNPCKLBW, 0x60) \ + X(X86_2B_PUNPCKLBD, 0x61) \ + X(X86_2B_PUNPCKLDQ, 0x62) \ + X(X86_2B_PACKSSWB, 0x63) \ + X(X86_2B_PCMPGTB, 0x64) \ + X(X86_2B_PCMPGTW, 0x65) \ + X(X86_2B_PCMPGTD, 0x66) \ + X(X86_2B_PACKUSWB, 0x67) \ + X(X86_2B_PUNPCKHBW, 0x68) \ + X(X86_2B_PUNPCKHWD, 0x69) \ + X(X86_2B_PUNPCKHDQ, 0x6A) \ + X(X86_2B_PACKSSDW, 0x6B) \ + X(X86_2B_PUNPCKLQDQ, 0x6C) \ + X(X86_2B_PUNPCKHQDQ, 0x6D) \ + X(X86_2B_MOVDRM, 0x6E) \ + X(X86_2B_MOVQRM, 0x6F) /* MOVQ/MOVDQA/MOVDQU */ \ + X(X86_2B_PCMPEQB, 0x74) \ + X(X86_2B_PCMPEQW, 0x75) \ + X(X86_2B_PCMPEQD, 0x76) \ + X(X86_2B_MOVDMR, 0x7E) \ + X(X86_2B_MOVQMR, 0x7F) \ + X(X86_2B_MOVNTI, 0xC3) \ + X(X86_2B_ADDSUB, 0xD0) /* ADDSUBP{S,D} */ \ + X(X86_2B_PSRLW, 0xD1) \ + X(X86_2B_PSRLD, 0xD2) \ + X(X86_2B_PSRLQ, 0xD3) \ + X(X86_2B_PADDQ, 0xD4) \ + X(X86_2B_PMULLW, 0xD5) \ + X(X86_2B_MOVQRR, 0xD6) /* MOVQ(m,r)/MOVQ2DQ/MOVQ2DQ based on prefix */ \ + X(X86_2B_PMOVMSKB, 0xD7) /* MOVQ2DQ/MOVDQ2Q */ \ + X(X86_2B_PSUBUSB, 0xD8) \ + X(X86_2B_PSUBUSW, 0xD9) \ + X(X86_2B_PMINUB, 0xDA) \ + X(X86_2B_PAND, 0xDB) \ + X(X86_2B_PADDUSB, 0xDC) \ + X(X86_2B_PADDUSW, 0xDD) \ + X(X86_2B_PMAXUB, 0xDE) \ + X(X86_2B_PANDN, 0xDF) \ + X(X86_2B_PAVGB, 0xE0) \ + X(X86_2B_PSRAW, 0xE1) \ + X(X86_2B_PSRAD, 0xE2) \ + X(X86_2B_PAVGW, 0xE3) \ + X(X86_2B_PMULHUW, 0xE4) \ + X(X86_2B_PMULHW, 0xE5) \ + X(X86_2B_CVTQ, 0xE6) /* CVTPD2DQ/CVTTPD2DQ/CVTDQ2PD */ \ + X(X86_2B_MOVNTQ, 0xE7) \ + X(X86_2B_PSUBSB, 0xE8) \ + X(X86_2B_PSUBSW, 0xE9) \ + X(X86_2B_PMINSB, 0xEA) \ + X(X86_2B_PMINSW, 0xEB) \ + X(X86_2B_PADDSB, 0xEC) \ + X(X86_2B_PADDSW, 0xED) \ + X(X86_2B_PMAXSW, 0xEE) \ + X(X86_2B_PXOR, 0xEF) \ + X(X86_2B_LDDQU, 0xF0) \ + X(X86_2B_PSLLW, 0xF1) \ + X(X86_2B_PSLLD, 0xF2) \ + X(X86_2B_PSLLQ, 0xF3) \ + X(X86_2B_PMULUDQ, 0xF4) \ + X(X86_2B_PMADDWD, 0xF5) \ + X(X86_2B_PSABDW, 0xF6) \ + X(X86_2B_MASKMOVQ, 0xF7) \ + X(X86_2B_PSUBB, 0xF8) \ + X(X86_2B_PSUBW, 0xF9) \ + X(X86_2B_PSUBD, 0xFA) \ + X(X86_2B_PSUBQ, 0xFB) \ + X(X86_2B_PADDB, 0xFC) \ + X(X86_2B_PADDW, 0xFD) \ + X(X86_2B_PADDD, 0xFE) + +/* Second bytes of opcodes with a ModRM and a 1-byte immediate operand */ +#define X86_OPS_2BYTE_MRM_I8(X) \ + X(X86_2B_SHLDMRI, 0xA4) \ + X(X86_2B_SHRDMRI, 0xAC) \ + X(X86_2B_BTXMI, 0xBA) /* BT/BTS/BTR/BTC depending on MRM.reg (4-7) */ \ + /* -- MMX/SSE1/SSE2 instructions -- */ \ + X(X86_2B_PSHUF, 0x70) /* PSHUFW/PSHUFLW/PSHUFHW/PSHUFD via MRM.reg */ \ + X(X86_2B_PSWI, 0x71) /* PSRLW/PSRAW/PSLLW via MRM.reg */ \ + X(X86_2B_PSDI, 0x72) /* PSRLD/PSRAD/PSLLD via MRM.reg */ \ + X(X86_2B_PSQI, 0x73) /* PSRLQ/PSRAQ/PSLLQ via MRM.reg */ \ + X(X86_2B_CMPSI, 0xC2) /* CMP{P,S}{S,D} via prefix */ \ + X(X86_2B_PINSRW, 0xC4) \ + X(X86_2B_PEXTRW, 0xC5) \ + X(X86_2B_SHUF, 0xC6) /* SHUFP{S,D} */ \ + +#define X86_OPS_2BYTE(X) \ + X86_OPS_2BYTE_NO(X) \ + X86_OPS_2BYTE_IW(X) \ + X86_OPS_2BYTE_MRM(X) \ + X86_OPS_2BYTE_MRM_I8(X) + +#define _X86_ENUM(name, value) name = value, +enum { + X86_PREFIXES(_X86_ENUM) + X86_OPS_1BYTE(_X86_ENUM) + X86_2BYTE = 0x0F, /* First byte of a 2- or 3-byte opcode */ + X86_OPS_2BYTE(_X86_ENUM) + X86_3BYTE1 = 0x38, /* One of the two second bytes of a three-byte opcode */ + X86_3BYTE2 = 0x3A, /* The other second byte of a three-byte opcode */ + X86_3DNOW = 0x0F /* The second byte of a three-byte 3DNow! opcode */ +}; +#undef _X86_ENUM + +/* + * Returns the length of an instruction, or -1 if it's a "known unknown" or + * invalid instruction. Doesn't handle unknown unknowns: may explode or hang on + * arbitrary untrusted data. Also doesn't handle, among other things, 3DNow!, + * SSE, MMX, AVX, and such. Aims to be small and fast rather than comprehensive. + */ +int x86_len(const void *insn); + +/* Constructs a ModRM byte, assuming the parameters are all in range. */ +#define X86_MODRM(mod, reg, rm) (unsigned char)((mod) << 6 | (reg) << 3 | rm) + +#endif + +// vi: sw=4 ts=4 noet tw=80 cc=80 diff --git a/tools/mkbindist.bat b/tools/mkbindist.bat new file mode 100644 index 0000000..49f5bc8 --- /dev/null +++ b/tools/mkbindist.bat @@ -0,0 +1,28 @@ +:: This file is dedicated to the public domain.
+@echo off
+
+:: NOTE: requires 7-zip, either in the default installation dir or %SEVENZIP%
+
+call compile.bat || exit /B
+if not exist release\ md release
+if "%SEVENZIP%"=="" set SEVENZIP=C:\Program Files\7-Zip\7z.exe
+setlocal EnableDelayedExpansion
+for /F "tokens=* usebackq" %%x IN (`^(echo VERSION_MAJOR ^& echo VERSION_MINOR^) ^| ^
+ clang -x c -E -include src\version.h - ^| findstr /v #`) do (
+ :: dumb but works:
+ if "!major!"=="" set major=%%x
+ set minor=%%x
+)
+setlocal DisableDelayedExpansion
+set name=threadfix-v%major%.%minor%-win32
+md TEMP-%name% || exit /B
+copy hl2.wrap.exe TEMP-%name%\hl2.wrap.exe || exit /B
+copy dist\LICENCE-threadfix TEMP-%name%\LICENCE-threadfix || exit /B
+:: using midnight on release day to make zip deterministic! change on next release!
+powershell (Get-Item TEMP-%name%\hl2.wrap.exe).LastWriteTime = new-object DateTime 2024, 2, 26, 0, 0, 0
+powershell (Get-Item TEMP-%name%\LICENCE-threadfix).LastWriteTime = new-object DateTime 2024, 2, 26, 0, 0, 0
+pushd TEMP-%name%
+"%SEVENZIP%" a -mtc=off %name%.zip hl2.wrap.exe LICENCE-threadfix || exit /B
+move %name%.zip ..\release\%name%.zip
+popd
+rd /s /q TEMP-%name%\ || exit /B
|