#ifndef SIMPLEX86_H #define SIMPLEX86_H #include typedef uint64_t op_t; typedef struct fragment_t fragment_t; typedef struct assembler_t assembler_t; /* Public API */ assembler_t *assembler_new (const char *prefix); void assembler_set_verbose (assembler_t *as, int verbose); uint8_t * assembler_link (assembler_t *as, fragment_t *fragment, ...); void assembler_free (assembler_t *as, uint8_t *code); fragment_t *fragment_new (assembler_t *as); void fragment_assemble (fragment_t *a, const uint64_t *code); size_t assembler_get_last_size (assembler_t *as); #define BEGIN_ASM(frag) \ { \ fragment_t *__frag = (frag); \ const uint64_t __c[] = { #define END_ASM() \ 0 \ }; \ \ fragment_assemble (__frag, __c); \ }; /* Operands */ typedef enum { OP_INVALID, /* Ensures that (op_t)0 is not a valid op */ OP_1, OP_I8, OP_I16, OP_I32, OP_CL, OP_AL, OP_AX, OP_EAX, OP_RAX, OP_GP8, OP_GP16, OP_GP32, OP_GP64, OP_MM, OP_XMM, OP_YMM, OP_MEM, OP_MEM8, OP_MEM16, OP_MEM32, OP_MEM64, OP_LABEL_REF, OP_RIP_REL, OP_RIP_REL8, OP_RIP_REL16, OP_RIP_REL32, OP_RIP_REL64, OP_FAKE_REG, OP_INST, OP_N_OP_TYPES } op_type_t; /* Size directives. Use like this: * * I_mov, QWORD_PTR + BASE (eax), IMM (17), * * They are only required when the operand size can't * be otherwise determined. */ #define BYTE_PTR (OP_MEM8 - OP_MEM) #define WORD_PTR (OP_MEM16 - OP_MEM) #define DWORD_PTR (OP_MEM32 - OP_MEM) #define QWORD_PTR (OP_MEM64 - OP_MEM) typedef enum { /* fake registers */ NO_REG = ( 0 << 6) | OP_FAKE_REG, rip = ( 1 << 6) | OP_FAKE_REG, /* 8 bit registers */ al = ( 0 << 6) | OP_AL, cl = ( 1 << 6) | OP_CL, dl = ( 2 << 6) | OP_GP8, bl = ( 3 << 6) | OP_GP8, ah = ( 4 << 6) | OP_GP8, ch = ( 5 << 6) | OP_GP8, dh = ( 6 << 6) | OP_GP8, bh = ( 7 << 6) | OP_GP8, r8l = ( 8 << 6) | OP_GP8, r9l = ( 9 << 6) | OP_GP8, r10l = (10 << 6) | OP_GP8, r11l = (11 << 6) | OP_GP8, r12l = (12 << 6) | OP_GP8, r13l = (13 << 6) | OP_GP8, r14l = (14 << 6) | OP_GP8, r15l = (15 << 6) | OP_GP8, /* 16 bit registers */ ax = ( 0 << 6) | OP_AX, cx = ( 1 << 6) | OP_GP16, dx = ( 2 << 6) | OP_GP16, bx = ( 3 << 6) | OP_GP16, sp = ( 4 << 6) | OP_GP16, bp = ( 5 << 6) | OP_GP16, si = ( 6 << 6) | OP_GP16, di = ( 7 << 6) | OP_GP16, r8w = ( 8 << 6) | OP_GP16, r9w = ( 9 << 6) | OP_GP16, r10w = (10 << 6) | OP_GP16, r11w = (11 << 6) | OP_GP16, r12w = (12 << 6) | OP_GP16, r13w = (13 << 6) | OP_GP16, r14w = (14 << 6) | OP_GP16, r15w = (15 << 6) | OP_GP16, /* 32 bit registers */ eax = ( 0 << 6) | OP_EAX, ecx = ( 1 << 6) | OP_GP32, edx = ( 2 << 6) | OP_GP32, ebx = ( 3 << 6) | OP_GP32, esp = ( 4 << 6) | OP_GP32, ebp = ( 5 << 6) | OP_GP32, esi = ( 6 << 6) | OP_GP32, edi = ( 7 << 6) | OP_GP32, r8d = ( 8 << 6) | OP_GP32, r9d = ( 9 << 6) | OP_GP32, r10d = (10 << 6) | OP_GP32, r11d = (11 << 6) | OP_GP32, r12d = (12 << 6) | OP_GP32, r13d = (13 << 6) | OP_GP32, r14d = (14 << 6) | OP_GP32, r15d = (15 << 6) | OP_GP32, /* 64 bit registers */ rax = ( 0 << 6) | OP_RAX, rcx = ( 1 << 6) | OP_GP64, rdx = ( 2 << 6) | OP_GP64, rbx = ( 3 << 6) | OP_GP64, rsp = ( 4 << 6) | OP_GP64, rbp = ( 5 << 6) | OP_GP64, rsi = ( 6 << 6) | OP_GP64, rdi = ( 7 << 6) | OP_GP64, r8 = ( 8 << 6) | OP_GP64, r9 = ( 9 << 6) | OP_GP64, r10 = (10 << 6) | OP_GP64, r11 = (11 << 6) | OP_GP64, r12 = (12 << 6) | OP_GP64, r13 = (13 << 6) | OP_GP64, r14 = (14 << 6) | OP_GP64, r15 = (15 << 6) | OP_GP64, /* 64 bit mmx registers */ mm0 = ( 0 << 6) | OP_MM, mm1 = ( 1 << 6) | OP_MM, mm2 = ( 2 << 6) | OP_MM, mm3 = ( 3 << 6) | OP_MM, mm4 = ( 4 << 6) | OP_MM, mm5 = ( 5 << 6) | OP_MM, mm6 = ( 6 << 6) | OP_MM, mm7 = ( 7 << 6) | OP_MM, /* 128 bit sse registers */ xmm0 = ( 0 << 6) | OP_XMM, xmm1 = ( 1 << 6) | OP_XMM, xmm2 = ( 2 << 6) | OP_XMM, xmm3 = ( 3 << 6) | OP_XMM, xmm4 = ( 4 << 6) | OP_XMM, xmm5 = ( 5 << 6) | OP_XMM, xmm6 = ( 6 << 6) | OP_XMM, xmm7 = ( 7 << 6) | OP_XMM, xmm8 = ( 8 << 6) | OP_XMM, xmm9 = ( 9 << 6) | OP_XMM, xmm10 = (10 << 6) | OP_XMM, xmm11 = (11 << 6) | OP_XMM, xmm12 = (12 << 6) | OP_XMM, xmm13 = (13 << 6) | OP_XMM, xmm14 = (14 << 6) | OP_XMM, xmm15 = (15 << 6) | OP_XMM, /* 256 bit avx registers */ ymm0 = ( 0 << 6) | OP_YMM, ymm1 = ( 1 << 6) | OP_YMM, ymm2 = ( 2 << 6) | OP_YMM, ymm3 = ( 3 << 6) | OP_YMM, ymm4 = ( 4 << 6) | OP_YMM, ymm5 = ( 5 << 6) | OP_YMM, ymm6 = ( 6 << 6) | OP_YMM, ymm7 = ( 7 << 6) | OP_YMM, ymm8 = ( 8 << 6) | OP_YMM, ymm9 = ( 9 << 6) | OP_YMM, ymm10 = (10 << 6) | OP_YMM, ymm11 = (11 << 6) | OP_YMM, ymm12 = (12 << 6) | OP_YMM, ymm13 = (13 << 6) | OP_YMM, ymm14 = (14 << 6) | OP_YMM, ymm15 = (15 << 6) | OP_YMM, } reg_t; /* Assume here that the string pointer will only make use of at most * 58 of the 64 bits. This is safe since a virtual pointer on x86-64 * is currently at most 48 bits. */ #define LABEL(l) \ ((((uint64_t)(size_t)(l)) << 6) | OP_LABEL_REF) #define RIP_REL(label) \ ((((uint64_t)(size_t)(label)) << 6) | OP_RIP_REL) /* base_reg + (index_reg << shift) + disp */ /* Layout: * [disp: 32] [4] [base op: 10] [index: 10] [shift: 2] [op_type: 6] */ #define INDEX(base_reg, disp, index_reg, shift) \ ((OP_MEM << 0) | \ (__SHIFT(shift) << 6) | \ (((index_reg)) << 8) | \ (((base_reg)) << 18) | \ ((uint64_t)(disp)) << 32) #define __SHIFT(v) \ (((v) == 8)? 3 : ((v) == 4)? 2 : \ ((v) == 2) ? 1 : ((v) == 1)? 0 : 0) #define IMM(v) \ ((((int64_t)(ssize_t)(v)) << 6) | \ (((v) == 1)? OP_1 : \ ((v) >= -128 && (v) < 128)? OP_I8 : \ ((v) >= -32768 && (v) < 32768)? OP_I16 : \ OP_I32)) #define IMM64(v) \ IMM ((((int64_t)(ssize_t)(v)) >> 32)), IMM ((int32_t)(ssize_t)(v)) #define ADDRESS32(addr) \ INDEX(NO_REG, addr, NO_REG, 1) #define PTR(reg_op) \ INDEX (reg_op, 0, NO_REG, 1) #define BASE(reg_op, disp) \ INDEX (reg_op, disp, NO_REG, 1) #define DEFINE_VALUE64(name, value) \ I_align, IMM (8), \ I_label, LABEL (name), \ I_dq, IMM64 (value) #define DEFINE_LABEL(name) \ I_label, LABEL (name) #define PROCESS_SIMD(name, rwinfo, n_ops, rwinfo_v, n_vops) \ PROCESS_INSTRUCTION (name, rwinfo, n_ops) \ PROCESS_INSTRUCTION (v##name, rwinfo_v, n_vops) #define PROCESS_SIMD_FLOAT_SINGLE(name, rw, n_ops, vrw, n_vops) \ PROCESS_SIMD (name##ps, rw, n_ops, vrw, n_vops) \ PROCESS_SIMD (name##ss, rw, n_ops, vrw, n_vops) #define PROCESS_SIMD_FLOAT_DOUBLE(name, rw, n_ops, vrw, n_vops) \ PROCESS_SIMD (name##pd, rw, n_ops, vrw, n_vops) \ PROCESS_SIMD (name##sd, rw, n_ops, vrw, n_vops) #define PROCESS_SIMD_FLOAT(name, rw, n_ops, vrw, n_vops) \ PROCESS_SIMD_FLOAT_SINGLE(name, rw, n_ops, vrw, n_vops) \ PROCESS_SIMD_FLOAT_DOUBLE(name, rw, n_ops, vrw, n_vops) #define PROCESS_XOP_COMPARE(type) \ PROCESS_INSTRUCTION (vpcomlt##type, WRR, 3) \ PROCESS_INSTRUCTION (vpcomle##type, WRR, 3) \ PROCESS_INSTRUCTION (vpcomgt##type, WRR, 3) \ PROCESS_INSTRUCTION (vpcomge##type, WRR, 3) \ PROCESS_INSTRUCTION (vpcome##type, WRR, 3) \ PROCESS_INSTRUCTION (vpcomne##type, WRR, 3) \ PROCESS_INSTRUCTION (vpcomfalse##type, WRR, 3) \ PROCESS_INSTRUCTION (vpcomtrue##type, WRR, 3) #define R (0x01) /* Read */ #define W (0x02) /* Write */ #define B (0x03) /* Both (Read-Write) */ #define NA (0x00) /* Not applicable (No variables) */ #define RR ((R << 2) | R) #define WR ((R << 2) | W) #define BR ((R << 2) | B) #define BB ((B << 2) | B) #define BRR ((R << 4) | (R << 2) | B) #define WRR ((R << 4) | (R << 2) | W) #define BRB ((B << 4) | (R << 2) | B) #define WRRR ((R << 6) | (R << 4) | (R << 2) | W) #define BRRR ((R << 6) | (R << 4) | (R << 2) | B) #define ALL_INSTRUCTIONS \ PROCESS_INSTRUCTION (dq, RR, 2) \ PROCESS_INSTRUCTION (dd, R, 1) \ PROCESS_INSTRUCTION (dw, R, 1) \ PROCESS_INSTRUCTION (db, R, 1) \ PROCESS_INSTRUCTION (cbw, NA, 0) \ PROCESS_INSTRUCTION (cwde, NA, 0) \ PROCESS_INSTRUCTION (cdqe, NA, 0) \ PROCESS_INSTRUCTION (bswap, B, 1) \ PROCESS_INSTRUCTION (movbe, WR, 2) \ PROCESS_INSTRUCTION (label, R, 1) \ PROCESS_INSTRUCTION (align, R, 1) \ PROCESS_INSTRUCTION (lock, NA, 0) \ PROCESS_INSTRUCTION (pushf, NA, 0) \ PROCESS_INSTRUCTION (popf, NA, 0) \ PROCESS_INSTRUCTION (cpuid, NA, 0) \ PROCESS_INSTRUCTION (xgetbv, NA, 0) \ PROCESS_INSTRUCTION (nop, NA, 0) \ PROCESS_INSTRUCTION (nop1, NA, 1) \ PROCESS_INSTRUCTION (ret, NA, 0) \ PROCESS_INSTRUCTION (push, R, 1) \ PROCESS_INSTRUCTION (pop, W, 1) \ PROCESS_INSTRUCTION (lea, WR, 2) \ PROCESS_INSTRUCTION (xchg, BB, 2) \ PROCESS_INSTRUCTION (bsf, WR, 2) \ PROCESS_INSTRUCTION (bsr, WR, 2) \ PROCESS_INSTRUCTION (mfence, NA, 0) \ PROCESS_INSTRUCTION (lfence, NA, 0) \ PROCESS_INSTRUCTION (mov, WR, 2) \ PROCESS_INSTRUCTION (movabs, WR, 3) \ PROCESS_INSTRUCTION (movd, WR, 2) \ PROCESS_INSTRUCTION (vmovd, WR, 2) \ PROCESS_INSTRUCTION (movq, WR, 2) \ PROCESS_INSTRUCTION (vmovq, WR, 2) \ PROCESS_INSTRUCTION (movq2dq, WR, 2) \ PROCESS_INSTRUCTION (shrd, BRR, 3) \ PROCESS_INSTRUCTION (shld, BRR, 3) \ PROCESS_INSTRUCTION (rdtsc, NA, 0) \ PROCESS_INSTRUCTION (rdtscp, NA, 0) \ PROCESS_INSTRUCTION (clflush, R, 1) \ PROCESS_INSTRUCTION (test, RR, 2) \ PROCESS_INSTRUCTION (clc, NA, 0) \ PROCESS_INSTRUCTION (cld, NA, 0) \ PROCESS_INSTRUCTION (add, BR, 2) \ PROCESS_INSTRUCTION (or, BR, 2) \ PROCESS_INSTRUCTION (adc, BR, 2) \ PROCESS_INSTRUCTION (sbb, BR, 2) \ PROCESS_INSTRUCTION (and, BR, 2) \ PROCESS_INSTRUCTION (sub, BR, 2) \ PROCESS_INSTRUCTION (xor, BR, 2) \ PROCESS_INSTRUCTION (cmp, RR, 2) \ PROCESS_INSTRUCTION (rol, BR, 2) \ PROCESS_INSTRUCTION (ror, BR, 2) \ PROCESS_INSTRUCTION (rcl, BR, 2) \ PROCESS_INSTRUCTION (rcr, BR, 2) \ PROCESS_INSTRUCTION (shl, BR, 2) \ PROCESS_INSTRUCTION (shr, BR, 2) \ PROCESS_INSTRUCTION (sal, BR, 2) \ PROCESS_INSTRUCTION (sar, BR, 2) \ PROCESS_INSTRUCTION (movzx, WR, 2) \ PROCESS_INSTRUCTION (movsx, WR, 2) \ PROCESS_INSTRUCTION (jmp, R, 1) \ PROCESS_INSTRUCTION (call, R, 1) \ PROCESS_INSTRUCTION (ja, R, 1) \ PROCESS_INSTRUCTION (jae, R, 1) \ PROCESS_INSTRUCTION (jb, R, 1) \ PROCESS_INSTRUCTION (jbe, R, 1) \ PROCESS_INSTRUCTION (jc, R, 1) \ PROCESS_INSTRUCTION (je, R, 1) \ PROCESS_INSTRUCTION (jg, R, 1) \ PROCESS_INSTRUCTION (jge, R, 1) \ PROCESS_INSTRUCTION (jl, R, 1) \ PROCESS_INSTRUCTION (jle, R, 1) \ PROCESS_INSTRUCTION (jna, R, 1) \ PROCESS_INSTRUCTION (jnae, R, 1) \ PROCESS_INSTRUCTION (jnb, R, 1) \ PROCESS_INSTRUCTION (jnbe, R, 1) \ PROCESS_INSTRUCTION (jnc, R, 1) \ PROCESS_INSTRUCTION (jne, R, 1) \ PROCESS_INSTRUCTION (jng, R, 1) \ PROCESS_INSTRUCTION (jnge, R, 1) \ PROCESS_INSTRUCTION (jnl, R, 1) \ PROCESS_INSTRUCTION (jnle, R, 1) \ PROCESS_INSTRUCTION (jno, R, 1) \ PROCESS_INSTRUCTION (jnp, R, 1) \ PROCESS_INSTRUCTION (jns, R, 1) \ PROCESS_INSTRUCTION (jnz, R, 1) \ PROCESS_INSTRUCTION (jo, R, 1) \ PROCESS_INSTRUCTION (jp, R, 1) \ PROCESS_INSTRUCTION (jpe, R, 1) \ PROCESS_INSTRUCTION (jpo, R, 1) \ PROCESS_INSTRUCTION (js, R, 1) \ PROCESS_INSTRUCTION (jz, R, 1) \ PROCESS_INSTRUCTION (not, B, 1) \ PROCESS_INSTRUCTION (neg, B, 1) \ PROCESS_INSTRUCTION (mul, B, 1) \ /* The imul instruction has three different forms, with \ * one, two, and three operands respectively. Since we rely \ * on the ability to determine the number of operands based on \ * the mnemonic alone, we have to make up three different \ * instruction names. \ */ \ PROCESS_INSTRUCTION (imul1, R, 1) \ PROCESS_INSTRUCTION (imul2, BR, 2) \ PROCESS_INSTRUCTION (imul3, WRR, 3) \ PROCESS_SIMD (movdqa, WR, 2, WR, 2) \ PROCESS_SIMD (movdqu, WR, 2, WR, 2) \ PROCESS_SIMD (lddqu, WR, 2, WR, 2) \ PROCESS_SIMD (palignr, BRR, 3, WRRR, 4) \ PROCESS_SIMD (packsswb, BR, 2, WRR, 3) \ PROCESS_SIMD (packssdw, BR, 2, WRR, 3) \ PROCESS_SIMD (packuswb, BR, 2, WRR, 3) \ PROCESS_SIMD (pand, BR, 2, WRR, 3) \ PROCESS_SIMD (pandn, BR, 2, WRR, 3) \ PROCESS_SIMD (por, BR, 2, WRR, 3) \ PROCESS_SIMD (pxor, BR, 2, WRR, 3) \ PROCESS_SIMD (pavgb, BR, 2, WRR, 3) \ PROCESS_SIMD (pavgw, BR, 2, WRR, 3) \ PROCESS_SIMD (paddb, BR, 2, WRR, 3) \ PROCESS_SIMD (paddw, BR, 2, WRR, 3) \ PROCESS_SIMD (paddd, BR, 2, WRR, 3) \ PROCESS_SIMD (paddq, BR, 2, WRR, 3) \ PROCESS_SIMD (paddsb, BR, 2, WRR, 3) \ PROCESS_SIMD (paddsw, BR, 2, WRR, 3) \ PROCESS_SIMD (paddusb, BR, 2, WRR, 3) \ PROCESS_SIMD (paddusw, BR, 2, WRR, 3) \ PROCESS_SIMD (pcmpeqb, BR, 2, WRR, 3) \ PROCESS_SIMD (pcmpeqw, BR, 2, WRR, 3) \ PROCESS_SIMD (pcmpeqd, BR, 2, WRR, 3) \ PROCESS_SIMD (pcmpgtb, BR, 2, WRR, 3) \ PROCESS_SIMD (pcmpgtw, BR, 2, WRR, 3) \ PROCESS_SIMD (pcmpgtd, BR, 2, WRR, 3) \ PROCESS_SIMD (pmaddwd, BR, 2, WRR, 3) \ PROCESS_SIMD (pmaxsw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmaxub, BR, 2, WRR, 3) \ PROCESS_SIMD (pminsw, BR, 2, WRR, 3) \ PROCESS_SIMD (pminub, BR, 2, WRR, 3) \ PROCESS_SIMD (pmulhuw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmulhw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmullw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmuludq, BR, 2, WRR, 3) \ PROCESS_SIMD (psadbw, BR, 2, WRR, 3) \ PROCESS_SIMD (psubb, BR, 2, WRR, 3) \ PROCESS_SIMD (psubw, BR, 2, WRR, 3) \ PROCESS_SIMD (psubd, BR, 2, WRR, 3) \ PROCESS_SIMD (psubq, BR, 2, WRR, 3) \ PROCESS_SIMD (psubsb, BR, 2, WRR, 3) \ PROCESS_SIMD (psubsw, BR, 2, WRR, 3) \ PROCESS_SIMD (psubusb, BR, 2, WRR, 3) \ PROCESS_SIMD (psubusw, BR, 2, WRR, 3) \ PROCESS_SIMD (punpckhbw, BR, 2, WRR, 3) \ PROCESS_SIMD (punpckhwd, BR, 2, WRR, 3) \ PROCESS_SIMD (punpckhdq, BR, 2, WRR, 3) \ PROCESS_SIMD (punpcklbw, BR, 2, WRR, 3) \ PROCESS_SIMD (punpcklwd, BR, 2, WRR, 3) \ PROCESS_SIMD (punpckldq, BR, 2, WRR, 3) \ PROCESS_SIMD (psllw, BR, 2, WRR, 3) \ PROCESS_SIMD (pslld, BR, 2, WRR, 3) \ PROCESS_SIMD (psllq, BR, 2, WRR, 3) \ PROCESS_SIMD (psrlw, BR, 2, WRR, 3) \ PROCESS_SIMD (psrld, BR, 2, WRR, 3) \ PROCESS_SIMD (psrlq, BR, 2, WRR, 3) \ PROCESS_SIMD (psraw, BR, 2, WRR, 3) \ PROCESS_SIMD (psrad, BR, 2, WRR, 3) \ PROCESS_SIMD (pinsrw, BRR, 3, WRRR, 4) \ PROCESS_SIMD (pextrw, BRR, 3, WRRR, 3) \ PROCESS_SIMD (pabsb, WR, 2, WR, 2) \ PROCESS_SIMD (pabsw, WR, 2, WR, 2) \ PROCESS_SIMD (pabsd, WR, 2, WR, 2) \ PROCESS_SIMD (ptest, WR, 2, WR, 2) \ PROCESS_SIMD (phminposuw, WR, 2, WR, 2) \ PROCESS_SIMD (pmovsxbw, WR, 2, WR, 2) \ PROCESS_SIMD (pmovzxbw, WR, 2, WR, 2) \ PROCESS_SIMD (pmovsxbd, WR, 2, WR, 2) \ PROCESS_SIMD (pmovzxbd, WR, 2, WR, 2) \ PROCESS_SIMD (pmovsxbq, WR, 2, WR, 2) \ PROCESS_SIMD (pmovzxbq, WR, 2, WR, 2) \ PROCESS_SIMD (pmovsxwd, WR, 2, WR, 2) \ PROCESS_SIMD (pmovzxwd, WR, 2, WR, 2) \ PROCESS_SIMD (pmovsxwq, WR, 2, WR, 2) \ PROCESS_SIMD (pmovzxwq, WR, 2, WR, 2) \ PROCESS_SIMD (pmovsxdq, WR, 2, WR, 2) \ PROCESS_SIMD (pmovzxdq, WR, 2, WR, 2) \ PROCESS_SIMD (psignb, BR, 2, WRR, 3) \ PROCESS_SIMD (psignw, BR, 2, WRR, 3) \ PROCESS_SIMD (psignd, BR, 2, WRR, 3) \ PROCESS_SIMD (packusdw, BR, 2, WRR, 3) \ PROCESS_SIMD (pcmpeqq, BR, 2, WRR, 3) \ PROCESS_SIMD (pcmpgtq, BR, 2, WRR, 3) \ PROCESS_SIMD (phaddw, BR, 2, WRR, 3) \ PROCESS_SIMD (phaddd, BR, 2, WRR, 3) \ PROCESS_SIMD (phaddsw, BR, 2, WRR, 3) \ PROCESS_SIMD (phsubw, BR, 2, WRR, 3) \ PROCESS_SIMD (phsubd, BR, 2, WRR, 3) \ PROCESS_SIMD (phsubsw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmaddubsw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmaxsb, BR, 2, WRR, 3) \ PROCESS_SIMD (pmaxsd, BR, 2, WRR, 3) \ PROCESS_SIMD (pmaxuw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmaxud, BR, 2, WRR, 3) \ PROCESS_SIMD (pminsb, BR, 2, WRR, 3) \ PROCESS_SIMD (pminsd, BR, 2, WRR, 3) \ PROCESS_SIMD (pminud, BR, 2, WRR, 3) \ PROCESS_SIMD (pminuw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmuldq, BR, 2, WRR, 3) \ PROCESS_SIMD (pmulhrsw, BR, 2, WRR, 3) \ PROCESS_SIMD (pmulld, BR, 2, WRR, 3) \ PROCESS_SIMD (pshufb, BR, 2, WRR, 3) \ PROCESS_SIMD (pshufhw, WRR, 3, WRR, 3) \ PROCESS_SIMD (pshuflw, WRR, 3, WRR, 3) \ PROCESS_SIMD (pmovmskb, WR, 2, WR, 2) \ PROCESS_SIMD_FLOAT (add, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT (and, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT (andn, BR, 2, WRR, 3) \ PROCESS_SIMD (blendps, BRR, 3, WRRR, 4) \ PROCESS_SIMD (blendpd, BRR, 3, WRRR, 4) \ PROCESS_SIMD_FLOAT (div, BR, 2, WRR, 3) \ PROCESS_SIMD (hsubps, BR, 2, WRR, 3) \ PROCESS_SIMD (hsubpd, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT (max, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT (min, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT (mul, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT (or, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT (round, BRR, 3, WRRR, 4) \ PROCESS_SIMD_FLOAT (sub, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT (xor, BR, 2, WRR, 3) \ PROCESS_SIMD_FLOAT_SINGLE (rcp, WR, 2, WR, 2) \ PROCESS_SIMD_FLOAT_SINGLE (rsqrt, WR, 2, WR, 2) \ PROCESS_SIMD_FLOAT (sqrt, WR, 2, WR, 2) \ PROCESS_SIMD_FLOAT (cmp, BRR, 3, WRRR, 4) \ PROCESS_SIMD_FLOAT (shuf, BRR, 3, WRRR, 4) \ PROCESS_INSTRUCTION (vcvtph2ps, WR, 2) \ PROCESS_INSTRUCTION (vcvtps2ph, WRR, 3) \ PROCESS_INSTRUCTION (vzeroall, NA, 0) \ PROCESS_INSTRUCTION (vzeroupper, NA, 0) \ PROCESS_INSTRUCTION (ldmxcsr, R, 1) \ PROCESS_INSTRUCTION (vldmxcsr, R, 1) \ PROCESS_INSTRUCTION (stmxcsr, W, 1) \ PROCESS_INSTRUCTION (vstmxcsr, W, 1) \ PROCESS_INSTRUCTION (fxsave, W, 1) \ PROCESS_INSTRUCTION (fxrstor, W, 1) \ PROCESS_INSTRUCTION (vmaskmovps, WRR, 3) \ PROCESS_INSTRUCTION (vmaskmovpd, WRR, 3) \ PROCESS_SIMD (pinsrb, BRR, 3, WRRR, 4) \ PROCESS_SIMD (pinsrd, BRR, 3, WRRR, 4) \ PROCESS_SIMD (pinsrq, BRR, 3, WRRR, 4) \ PROCESS_SIMD (pextrb, BRR, 3, WRRR, 3) \ PROCESS_SIMD (pextrd, BRR, 3, WRRR, 3) \ PROCESS_SIMD (pextrq, BRR, 3, WRRR, 3) \ PROCESS_INSTRUCTION (crc32, BR, 2) \ PROCESS_INSTRUCTION (vgatherdd, BRB, 3) \ PROCESS_INSTRUCTION (vgatherqd, BRB, 3) \ PROCESS_INSTRUCTION (vgatherdq, BRB, 3) \ PROCESS_INSTRUCTION (vgatherqq, BRB, 3) \ PROCESS_INSTRUCTION (vgatherdps, BRB, 3) \ PROCESS_INSTRUCTION (vgatherqps, BRB, 3) \ PROCESS_INSTRUCTION (vgatherdpd, BRB, 3) \ PROCESS_INSTRUCTION (vgatherqpd, BRB, 3) \ PROCESS_INSTRUCTION (andn, WRR, 3) \ PROCESS_INSTRUCTION (bextr, WRR, 3) \ PROCESS_INSTRUCTION (blsi, WR, 2) \ PROCESS_INSTRUCTION (blsmsk, WR, 2) \ PROCESS_INSTRUCTION (blsr, WR, 2) \ PROCESS_INSTRUCTION (bzhi, WRR, 3) \ PROCESS_INSTRUCTION (lzcnt, WR, 2) \ PROCESS_INSTRUCTION (popcnt, WR, 2) \ PROCESS_INSTRUCTION (mulx, WRR, 3) \ PROCESS_INSTRUCTION (pdep, WRR, 3) \ PROCESS_INSTRUCTION (pext, WRR, 3) \ PROCESS_INSTRUCTION (rorx, WRR, 3) \ PROCESS_INSTRUCTION (sarx, WRR, 3) \ PROCESS_INSTRUCTION (shlx, WRR, 3) \ PROCESS_INSTRUCTION (shrx, WRR, 3) \ PROCESS_INSTRUCTION (tzcnt, WR, 2) \ PROCESS_INSTRUCTION (vpcmov, WRRR, 4) \ PROCESS_XOP_COMPARE (b) \ PROCESS_XOP_COMPARE (w) \ PROCESS_XOP_COMPARE (d) \ PROCESS_XOP_COMPARE (q) \ PROCESS_XOP_COMPARE (ub) \ PROCESS_XOP_COMPARE (ud) \ PROCESS_XOP_COMPARE (uq) \ PROCESS_XOP_COMPARE (uw) enum { I_none, #define PROCESS_INSTRUCTION(name, rwinfo, n_ops) \ I_##name##__serial, ALL_INSTRUCTIONS #undef PROCESS_INSTRUCTION I_n_instructions }; #define GET_N_OPS(instruction) \ (((instruction) >> 6) & 7) #define GET_SERIAL(instruction) \ (((instruction) >> 17)) #define OP_READ(instruction, op) \ ((((instruction) >> (9 + (op) * 2))) & 0x01) #define OP_WRITTEN(instruction, op) \ ((((instruction) >> (9 + (op) * 2))) & 0x02) typedef enum { #define PROCESS_INSTRUCTION(name, rw, n_ops) \ I_##name = (I_##name##__serial << 17) | ((rw) << 9) | ((n_ops) << 6) | OP_INST, ALL_INSTRUCTIONS #undef PROCESS_INSTRUCTION } instruction_t; #endif