From 43c64eee8dd08d61d029be5a30c0edc098d282ab Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Sat, 7 Sep 2024 12:57:38 +0100 Subject: Un-break and re-fix x86 The last fix was, uh, not good. With any luck this is actually correct now. Certainly, running many millions of test cases fails to find any mismatch with udis, so it's at least a lot less wrong than it was. --- src/x86.c | 4 ++-- src/x86.h | 17 +++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/x86.c b/src/x86.c index e0431d6..5399af8 100644 --- a/src/x86.c +++ b/src/x86.c @@ -25,7 +25,6 @@ static int mrmsib(const uchar *p, int addrlen) { // But it's confusingly-written enough that the code I wrote before didn't // work, so with any luck nobody will need to refer to it again and this is // actually correct now. Fingers crossed. - if ((*p & 0xC6) == 0x06) return 3; // special case for disp16 if (addrlen == 4 || *p & 0xC0) { int sib = addrlen == 4 && *p < 0xC0 && (*p & 7) == 4; switch (*p & 0xC0) { @@ -41,7 +40,7 @@ static int mrmsib(const uchar *p, int addrlen) { case 0x80: return 1 + addrlen + sib; } } - if (addrlen == 2 && *p == 0x26) return 3; + if (addrlen == 2 && (*p & 0xC7) == 0x06) return 3; return 1; // note: include the mrm itself in the byte count } @@ -66,6 +65,7 @@ P: X86_SEG_PREFIXES(CASES) X86_OPS_1BYTE_NO(CASES) return pfxlen + 1; X86_OPS_1BYTE_I8(CASES) operandlen = 1; X86_OPS_1BYTE_IW(CASES) return pfxlen + 1 + operandlen; + X86_OPS_1BYTE_IWI(CASES) return pfxlen + 1 + addrlen; X86_OPS_1BYTE_I16(CASES) return pfxlen + 3; X86_OPS_1BYTE_MRM(CASES) return pfxlen + 1 + mrmsib(insn + 1, addrlen); X86_OPS_1BYTE_MRM_I8(CASES) operandlen = 1; diff --git a/src/x86.h b/src/x86.h index 52e4f9b..b4df9c8 100644 --- a/src/x86.h +++ b/src/x86.h @@ -25,6 +25,9 @@ */ // XXX: no BOUND (0x62): ambiguous with EVEX prefix - can't be arsed! +// XXX: no LES (0xC4) or DES (0xC5) either for similar reasons. better to report +// an unknown instruction than to potentially misinterpret an AVX thing. +// these are all legacy instructions that won't really be used much anyway. /* Instruction prefixes: segments */ #define X86_SEG_PREFIXES(X) \ @@ -188,10 +191,6 @@ X(X86_XOREAXI, 0x35) \ X(X86_CMPEAXI, 0x3D) \ X(X86_PUSHIW, 0x68) \ - X(X86_MOVALII, 0xA0) /* From offset (indirect) */ \ - X(X86_MOVEAXII, 0xA1) /* From offset (indirect) */ \ - X(X86_MOVIIAL, 0xA2) /* To offset (indirect) */ \ - X(X86_MOVIIEAX, 0xA3) /* To offset (indirect) */ \ X(X86_TESTEAXI, 0xA9) \ X(X86_MOVEAXI, 0xB8) \ X(X86_MOVECXI, 0xB9) \ @@ -204,6 +203,13 @@ X(X86_CALL, 0xE8) \ X(X86_JMPIW, 0xE9) +/* Single-byte opcodes with a word-sized immediate operand (indirect) */ +#define X86_OPS_1BYTE_IWI(X) \ + X(X86_MOVALII, 0xA0) /* From offset (indirect) */ \ + X(X86_MOVEAXII, 0xA1) /* From offset (indirect) */ \ + X(X86_MOVIIAL, 0xA2) /* To offset (indirect) */ \ + X(X86_MOVIIEAX, 0xA3) /* To offset (indirect) */ \ + /* Single-byte opcodes with 16-bit immediate operands, regardless of prefixes */ #define X86_OPS_1BYTE_I16(X) \ X(X86_RETI16, 0xC2) \ @@ -259,8 +265,6 @@ X(X86_LEA, 0x8D) \ X(X86_MOVSM, 0x8E) /* Store 4 bytes to segment register */ \ X(X86_POPM, 0x8F) \ - X(X86_LES, 0xC4) \ - X(X86_LDS, 0xC5) \ X(X86_SHIFTM18, 0xD0) /* Shift/roll by 1 place */ \ X(X86_SHIFTM1W, 0xD1) /* Shift/roll by 1 place */ \ X(X86_SHIFTMCL8, 0xD2) /* Shift/roll by CL places */ \ @@ -297,6 +301,7 @@ X86_OPS_1BYTE_NO(X) \ X86_OPS_1BYTE_I8(X) \ X86_OPS_1BYTE_IW(X) \ + X86_OPS_1BYTE_IWI(X) \ X86_OPS_1BYTE_I16(X) \ X86_OPS_1BYTE_MRM(X) \ X86_OPS_1BYTE_MRM_I8(X) \ -- cgit v1.2.3