summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorMichael Smith <mikesmiffy128@gmail.com>2024-09-07 12:57:38 +0100
committerMichael Smith <mikesmiffy128@gmail.com>2024-09-07 12:57:38 +0100
commit43c64eee8dd08d61d029be5a30c0edc098d282ab (patch)
treea71e412b1fefd3abf89093ca4830a5cf3ba1c46e /src
parent8bb4226f07b1e9ee79f3429a1495eaa694b13334 (diff)
Un-break and re-fix x86
The last fix was, uh, not good. With any luck this is actually correct now. Certainly, running many millions of test cases fails to find any mismatch with udis, so it's at least a lot less wrong than it was.
Diffstat (limited to 'src')
-rw-r--r--src/x86.c4
-rw-r--r--src/x86.h17
2 files changed, 13 insertions, 8 deletions
diff --git a/src/x86.c b/src/x86.c
index e0431d6..5399af8 100644
--- a/src/x86.c
+++ b/src/x86.c
@@ -25,7 +25,6 @@ static int mrmsib(const uchar *p, int addrlen) {
// But it's confusingly-written enough that the code I wrote before didn't
// work, so with any luck nobody will need to refer to it again and this is
// actually correct now. Fingers crossed.
- if ((*p & 0xC6) == 0x06) return 3; // special case for disp16
if (addrlen == 4 || *p & 0xC0) {
int sib = addrlen == 4 && *p < 0xC0 && (*p & 7) == 4;
switch (*p & 0xC0) {
@@ -41,7 +40,7 @@ static int mrmsib(const uchar *p, int addrlen) {
case 0x80: return 1 + addrlen + sib;
}
}
- if (addrlen == 2 && *p == 0x26) return 3;
+ if (addrlen == 2 && (*p & 0xC7) == 0x06) return 3;
return 1; // note: include the mrm itself in the byte count
}
@@ -66,6 +65,7 @@ P: X86_SEG_PREFIXES(CASES)
X86_OPS_1BYTE_NO(CASES) return pfxlen + 1;
X86_OPS_1BYTE_I8(CASES) operandlen = 1;
X86_OPS_1BYTE_IW(CASES) return pfxlen + 1 + operandlen;
+ X86_OPS_1BYTE_IWI(CASES) return pfxlen + 1 + addrlen;
X86_OPS_1BYTE_I16(CASES) return pfxlen + 3;
X86_OPS_1BYTE_MRM(CASES) return pfxlen + 1 + mrmsib(insn + 1, addrlen);
X86_OPS_1BYTE_MRM_I8(CASES) operandlen = 1;
diff --git a/src/x86.h b/src/x86.h
index 52e4f9b..b4df9c8 100644
--- a/src/x86.h
+++ b/src/x86.h
@@ -25,6 +25,9 @@
*/
// XXX: no BOUND (0x62): ambiguous with EVEX prefix - can't be arsed!
+// XXX: no LES (0xC4) or DES (0xC5) either for similar reasons. better to report
+// an unknown instruction than to potentially misinterpret an AVX thing.
+// these are all legacy instructions that won't really be used much anyway.
/* Instruction prefixes: segments */
#define X86_SEG_PREFIXES(X) \
@@ -188,10 +191,6 @@
X(X86_XOREAXI, 0x35) \
X(X86_CMPEAXI, 0x3D) \
X(X86_PUSHIW, 0x68) \
- X(X86_MOVALII, 0xA0) /* From offset (indirect) */ \
- X(X86_MOVEAXII, 0xA1) /* From offset (indirect) */ \
- X(X86_MOVIIAL, 0xA2) /* To offset (indirect) */ \
- X(X86_MOVIIEAX, 0xA3) /* To offset (indirect) */ \
X(X86_TESTEAXI, 0xA9) \
X(X86_MOVEAXI, 0xB8) \
X(X86_MOVECXI, 0xB9) \
@@ -204,6 +203,13 @@
X(X86_CALL, 0xE8) \
X(X86_JMPIW, 0xE9)
+/* Single-byte opcodes with a word-sized immediate operand (indirect) */
+#define X86_OPS_1BYTE_IWI(X) \
+ X(X86_MOVALII, 0xA0) /* From offset (indirect) */ \
+ X(X86_MOVEAXII, 0xA1) /* From offset (indirect) */ \
+ X(X86_MOVIIAL, 0xA2) /* To offset (indirect) */ \
+ X(X86_MOVIIEAX, 0xA3) /* To offset (indirect) */ \
+
/* Single-byte opcodes with 16-bit immediate operands, regardless of prefixes */
#define X86_OPS_1BYTE_I16(X) \
X(X86_RETI16, 0xC2) \
@@ -259,8 +265,6 @@
X(X86_LEA, 0x8D) \
X(X86_MOVSM, 0x8E) /* Store 4 bytes to segment register */ \
X(X86_POPM, 0x8F) \
- X(X86_LES, 0xC4) \
- X(X86_LDS, 0xC5) \
X(X86_SHIFTM18, 0xD0) /* Shift/roll by 1 place */ \
X(X86_SHIFTM1W, 0xD1) /* Shift/roll by 1 place */ \
X(X86_SHIFTMCL8, 0xD2) /* Shift/roll by CL places */ \
@@ -297,6 +301,7 @@
X86_OPS_1BYTE_NO(X) \
X86_OPS_1BYTE_I8(X) \
X86_OPS_1BYTE_IW(X) \
+ X86_OPS_1BYTE_IWI(X) \
X86_OPS_1BYTE_I16(X) \
X86_OPS_1BYTE_MRM(X) \
X86_OPS_1BYTE_MRM_I8(X) \