summaryrefslogtreecommitdiffhomepage
path: root/src/x86.c
diff options
context:
space:
mode:
authorMichael Smith <mikesmiffy128@gmail.com>2022-04-24 03:27:35 +0100
committerMichael Smith <mikesmiffy128@gmail.com>2022-04-24 03:43:26 +0100
commit7b12eb811ff62d9d14ccb7c152a9821796efe9a5 (patch)
treede95be73de40e732d8bbd002b721b4683fbf12c0 /src/x86.c
parent99e9a6765a9a358987c062ec4a251f8254581933 (diff)
Replace udis86 with a very small x86 decoder
hook_inline() uses the new x86_len() function to get instruction lengths instead of doing full-blown disassembly, which should be a tiny bit quicker, and also removes the next for about 90KiB of lookup tables and such in the final binary. The code-digging logic in demorecord is also rewritten to be opcode-based rather than mnenmonic based. In general, going forward the plan is to always rely on opcodes and thus avoid a bunch of disassembly work every plugin load. udis86 is still in the tree for now to provide dbg_asmdump(), but it's only compiled into debug builds and left out of releases completely. As such, the whole BSD licence statement is also gone from the distribution LICENCE files. There's now also a dbg_toghidra() which spits out a rebased address to look stuff up for proper reverse engineering, which might be more useful than dbg_asmdump() anyway. If nobody ends up using the latter ever again, udis86 could get chucked completely. We'll see. Also shoehorned into this commit are a couple more forgotten copyright year bumps and some general minor cleanup here and there, because I couldn't be bothered wading through all the diff hunks. Oh, and makebindist.bat now makes an effort to make the zip file timestamps predictable/reproducible. That should be a different commit for sure, but oh well too bad.
Diffstat (limited to 'src/x86.c')
-rw-r--r--src/x86.c87
1 files changed, 87 insertions, 0 deletions
diff --git a/src/x86.c b/src/x86.c
new file mode 100644
index 0000000..e33efbb
--- /dev/null
+++ b/src/x86.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2022 Michael Smith <mikesmiffy128@gmail.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "intdefs.h"
+#include "x86.h"
+
+static int mrm(uchar b, int addrlen) {
+ // I won't lie: I don't *entirely* understand this particular logic. I
+ // largely based it on some public domain code I found on the internet
+ if (addrlen == 4 || b & 0xC0) {
+ int sib = addrlen == 4 && b < 0xC0 && (b & 7) == 4;
+ switch (b & 0xC0) {
+ // disp8
+ case 0x40: return 2 + sib;
+ // disp16/32
+ case 0: if ((b & 7) == 5) case 0x80: return 1 + addrlen + sib;
+ }
+ // disp8/32
+ if (sib && (b & 7) == 5) return b & 0x40 ? 3 : 6;
+ }
+ if (addrlen == 2 && b == 0x26) return 3;
+ return 1; // NOTE: include the mrm itself in the byte count
+}
+
+int x86_len(const void *insn_) {
+#define CASES(name, _) case name:
+ const uchar *insn = insn_;
+ int pfxlen = 0, addrlen = 4, operandlen = 4;
+
+p: switch (*insn) {
+ case X86_PFX_ADSZ: addrlen = 2; goto P; // bit dumb sorry
+ case X86_PFX_OPSZ: operandlen = 2;
+P: X86_SEG_PREFIXES(CASES)
+ case X86_PFX_LOCK: case X86_PFX_REPN: case X86_PFX_REP:
+ // instruction can only be 15 bytes. this could go over, oh well,
+ // just don't want to loop for 8 million years
+ if (++pfxlen == 14) return -1;
+ ++insn;
+ goto p;
+ }
+
+ switch (*insn) {
+ X86_OPS_1BYTE_NO(CASES) return pfxlen + 1;
+ X86_OPS_1BYTE_I8(CASES) operandlen = 1;
+ X86_OPS_1BYTE_IW(CASES) return pfxlen + 1 + operandlen;
+ X86_OPS_1BYTE_I16(CASES) return pfxlen + 3;
+ X86_OPS_1BYTE_MRM(CASES) return pfxlen + 1 + mrm(insn[1], addrlen);
+ X86_OPS_1BYTE_MRM_I8(CASES) operandlen = 1;
+ X86_OPS_1BYTE_MRM_IW(CASES)
+ return pfxlen + 1 + operandlen + mrm(insn[1], addrlen);
+ case X86_ENTER: return pfxlen + 4;
+ case X86_CRAZY8: operandlen = 1;
+ case X86_CRAZYW:
+ if ((insn[1] & 0x38) >= 0x10) operandlen = 0;
+ return pfxlen + 2 + operandlen + mrm(insn[1], addrlen);
+ case X86_2BYTE: ++insn; goto b2;
+ }
+ return -1;
+
+b2: switch (*insn) {
+ // we don't support any 3 byte ops for now, implement if ever needed...
+ case X86_3BYTE1: case X86_3BYTE2: case X86_3DNOW: return -1;
+ X86_OPS_2BYTE_NO(CASES) return pfxlen + 2;
+ X86_OPS_2BYTE_IW(CASES) return pfxlen + 2 + operandlen;
+ X86_OPS_2BYTE_MRM(CASES) return pfxlen + 2 + mrm(insn[1], addrlen);
+ X86_OPS_2BYTE_MRM_I8(CASES) operandlen = 1;
+ return pfxlen + 2 + operandlen + mrm(insn[1], addrlen);
+ }
+
+ return -1;
+#undef CASES
+}
+
+// vi: sw=4 ts=4 noet tw=80 cc=80