summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSøren Sandmann Pedersen <ssp@redhat.com>2013-02-20 17:33:03 -0500
committerSøren Sandmann Pedersen <ssp@redhat.com>2013-02-20 17:33:03 -0500
commit655a156a424da6ae409cc4311ac72057d44fb87b (patch)
tree737b12480e762fc37474985715eab66a1ad282ec
parent64535c7dabb4cd3dac639c659feec49cd4548152 (diff)
Organize instructino table by sse level
-rw-r--r--simplex86.c335
1 files changed, 173 insertions, 162 deletions
diff --git a/simplex86.c b/simplex86.c
index 8035b0a..512fba6 100644
--- a/simplex86.c
+++ b/simplex86.c
@@ -231,34 +231,6 @@ static const variant_t variants[] =
* different name, and movabs is what GNU as uses.
*/
{ I_movabs, { A_R64, A_I32, A_I32 }, F_386, E_O, 0xb8 },
- { I_movq, { A_MMX, A_MMXM }, F_MMX, E_RM, 0x0f6f, NO_REX_W },
- { I_movq, { A_MMXM, A_MMX }, F_MMX, E_MR, 0x0f7f, NO_REX_W },
- { I_movq, { A_SSE, A_SSEM }, F_SSE2, E_RM, 0x0f7e, PRE_F3 | NO_REX_W },
- { I_movq, { A_SSEM, A_SSE }, F_SSE2, E_MR, 0x0fd6, PRE_66 | NO_REX_W },
- { I_movq2dq, { A_SSE, A_MMX }, F_SSE2, E_RM, 0x0fd6, PRE_F3 | NO_REX_W },
- { I_movdqa, { A_SSE, A_SSEM }, F_SSE2, E_RM, 0x0f6f, PRE_66 },
- { I_movdqa, { A_SSEM, A_SSE }, F_SSE2, E_MR, 0x0f7f, PRE_66 },
- { I_vmovdqa, { A_SSE, A_SSEM }, F_AVX, E_RM, 0x6f, VEX(128,0x66,0x0f,WIG) },
- { I_vmovdqa, { A_SSEM, A_SSE }, F_AVX, E_MR, 0x7f, VEX(128,0x66,0x0f,WIG) },
- { I_vmovdqa, { A_AVX, A_AVXM }, F_AVX, E_RM, 0x6f, VEX(256,0x66,0x0f,WIG) },
- { I_vmovdqa, { A_AVXM, A_AVX }, F_AVX, E_MR, 0x7f, VEX(256,0x66,0x0f,WIG) },
- { I_movdqu, { A_SSE, A_SSEM }, F_SSE2, E_RM, 0x0f6f, PRE_F3 },
- { I_movdqu, { A_SSEM, A_SSE }, F_SSE2, E_MR, 0x0f7f, PRE_F3 },
- { I_vmovdqu, { A_SSE, A_SSEM }, F_AVX, E_RM, 0x6f, VEX(128,0xf3,0x0f,WIG) },
- { I_vmovdqu, { A_SSEM, A_SSE }, F_AVX, E_MR, 0x7f, VEX(128,0xf3,0x0f,WIG) },
- { I_vmovdqu, { A_AVX, A_AVXM }, F_AVX, E_RM, 0x6f, VEX(256,0xf3,0x0f,WIG) },
- { I_vmovdqu, { A_AVXM, A_AVX }, F_AVX, E_MR, 0x7f, VEX(256,0xf3,0x0f,WIG) },
- { I_lddqu, { A_SSE, A_MEM }, F_SSE3, E_RM, 0x0ff0, PRE_F2 },
- { I_vlddqu, { A_SSE, A_MEM }, F_AVX, E_RM, 0xf0, VEX(128,0xf2,0x0f,WIG) },
- { I_vlddqu, { A_AVX, A_MEM }, F_AVX, E_RM, 0xf0, VEX(256,0xf2,0x0f,WIG) },
- { I_vmaskmovps, { A_SSE, A_SSE, A_MEM }, F_AVX, E_RVM, 0x2c, VEX(128,0x66,0x0f38,W0) },
- { I_vmaskmovps, { A_AVX, A_AVX, A_MEM }, F_AVX, E_RVM, 0x2c, VEX(256,0x66,0x0f38,W0) },
- { I_vmaskmovps, { A_MEM, A_SSE, A_SSE }, F_AVX, E_MVR, 0x2e, VEX(128,0x66,0x0f38,W0) },
- { I_vmaskmovps, { A_MEM, A_AVX, A_AVX }, F_AVX, E_MVR, 0x2e, VEX(256,0x66,0x0f38,W0) },
- { I_vmaskmovpd, { A_SSE, A_SSE, A_MEM }, F_AVX, E_RVM, 0x2d, VEX(128,0x66,0x0f38,W0) },
- { I_vmaskmovpd, { A_AVX, A_AVX, A_MEM }, F_AVX, E_RVM, 0x2d, VEX(256,0x66,0x0f38,W0) },
- { I_vmaskmovpd, { A_MEM, A_SSE, A_SSE }, F_AVX, E_MVR, 0x2f, VEX(128,0x66,0x0f38,W0) },
- { I_vmaskmovpd, { A_MEM, A_AVX, A_AVX }, F_AVX, E_MVR, 0x2f, VEX(256,0x66,0x0f38,W0) },
#define ALU_OPS(name, opc) \
{ name, { A_RM8, A_R8 }, F_386, E_MR, (opc << 3) + 0 }, \
@@ -394,91 +366,11 @@ static const variant_t variants[] =
{ I_imul3, { A_R32, A_RM32, A_I32 }, F_386, E_RM, 0x69 },
{ I_imul3, { A_R64, A_RM64, A_I32 }, F_386, E_RM, 0x69 },
- /* SIMD instructions */
- { I_palignr, { A_MMX, A_MMXM, A_I8 }, F_SSSE3, E_RM, 0x0f3a0f },
- { I_palignr, { A_SSE, A_SSEM, A_I8 }, F_SSSE3, E_RM, 0x0f3a0f, PRE_66 },
- { I_vpalignr, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, 0x0f, VEX(128,0x66,0x0f3a,WIG) },
-
-#define SIMD_FLOAT_OPS(name, opc) \
- { I_##name##ps, { A_SSE, A_SSEM }, F_SSE, E_RM, (0x0f00 + opc) }, \
- { I_v##name##ps, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x00,0x0f,WIG) }, \
- { I_v##name##ps, { A_AVX, A_AVX, A_AVXM }, F_AVX, E_RVM, opc, VEX(256,0x00,0x0f,WIG) }, \
- { I_##name##pd, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_66 }, \
- { I_v##name##pd, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f,WIG) }, \
- { I_v##name##pd, { A_AVX, A_AVX, A_AVXM }, F_AVX, E_RVM, opc, VEX(256,0x66,0x0f,WIG) }, \
- { I_##name##ss, { A_SSE, A_SSEM }, F_SSE, E_RM, (0x0f00 + opc), PRE_F3 }, \
- { I_v##name##ss, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(LIG,0xf3,0x0f,WIG) }, \
- { I_##name##sd, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_F2 }, \
- { I_v##name##sd, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(LIG,0xf2,0x0f,WIG) }
-
- SIMD_FLOAT_OPS (add, 0x58),
- SIMD_FLOAT_OPS (and, 0x54),
- SIMD_FLOAT_OPS (andn, 0x55),
- SIMD_FLOAT_OPS (div, 0x5e),
- SIMD_FLOAT_OPS (max, 0x5f),
- SIMD_FLOAT_OPS (min, 0x5d),
- SIMD_FLOAT_OPS (mul, 0x59),
- SIMD_FLOAT_OPS (or, 0x56),
- SIMD_FLOAT_OPS (sub, 0x5c),
- SIMD_FLOAT_OPS (xor, 0x57),
-
- { I_hsubps, { A_SSE, A_SSEM }, F_SSE3, E_RM, 0x0f7d, PRE_F2 },
- { I_vhsubps, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, 0x7d, VEX(128,0xf2,0x0f,WIG) },
- { I_vhsubps, { A_AVX, A_AVX, A_AVXM }, F_AVX, E_RVM, 0x7d, VEX(256,0Xf2,0x0f,WIG) },
- { I_hsubpd, { A_SSE, A_SSEM }, F_SSE3, E_RM, 0x0f7d, PRE_66 },
- { I_vhsubpd, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, 0x7d, VEX(128,0x66,0x0f,WIG) },
- { I_vhsubpd, { A_AVX, A_AVX, A_AVXM }, F_AVX, E_RVM, 0x7d, VEX(256,0x66,0x0f,WIG) },
-
-#define SIMD_FLOAT_OPS_IMM(name, opc) \
- { I_##name##ps, { A_SSE, A_SSEM, A_I8 }, F_SSE, E_RM, (0x0f00 + opc) }, \
- { I_v##name##ps, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(128,0x00,0x0f,WIG) }, \
- { I_v##name##ps, { A_AVX, A_AVX, A_AVXM, A_I8 }, F_AVX, E_RVM, opc, VEX(256,0x00,0x0f,WIG) }, \
- { I_##name##pd, { A_SSE, A_SSEM, A_I8 }, F_SSE2, E_RM, (0x0f00 + opc), PRE_66 }, \
- { I_v##name##pd, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f,WIG) }, \
- { I_v##name##pd, { A_AVX, A_AVX, A_AVXM, A_I8 }, F_AVX, E_RVM, opc, VEX(256,0x66,0x0f,WIG) }, \
- { I_##name##ss, { A_SSE, A_SSEM, A_I8 }, F_SSE, E_RM, (0x0f00 + opc), PRE_F3 }, \
- { I_v##name##ss, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(LIG,0xf3,0x0f,WIG) }, \
- { I_##name##sd, { A_SSE, A_SSEM, A_I8 }, F_SSE2, E_RM, (0x0f00 + opc), PRE_F2 }, \
- { I_v##name##sd, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(LIG,0xf2,0x0f,WIG) }
-
- SIMD_FLOAT_OPS_IMM (cmp, 0xc2),
- SIMD_FLOAT_OPS_IMM (shuf, 0xc6),
-
-#define SIMD_FLOAT_OPS_UNARY_SINGLE(name, opc) \
- { I_##name##ps, { A_SSE, A_SSEM }, F_SSE, E_RM, (0x0f00 + opc) }, \
- { I_v##name##ps, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(128,0x00,0x0f,WIG) }, \
- { I_v##name##ps, { A_AVX, A_AVXM }, F_AVX, E_RM, opc, VEX(256,0x00,0x0f,WIG) }, \
- { I_##name##ss, { A_SSE, A_SSEM }, F_SSE, E_RM, (0x0f00 + opc), PRE_F3 }, \
- { I_v##name##ss, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(LIG,0xf3,0x0f,WIG) }
-
-#define SIMD_FLOAT_OPS_UNARY_DOUBLE(name, opc) \
- { I_##name##pd, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_66 }, \
- { I_v##name##pd, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(128,0x66,0x0f,WIG) }, \
- { I_v##name##pd, { A_AVX, A_AVXM }, F_AVX, E_RM, opc, VEX(256,0x66,0x0f,WIG) }, \
- { I_##name##sd, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_F2 }, \
- { I_v##name##sd, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(LIG,0xf2,0x0f,WIG) }
-
- SIMD_FLOAT_OPS_UNARY_SINGLE (sqrt, 0x51),
- SIMD_FLOAT_OPS_UNARY_DOUBLE (sqrt, 0x51),
- SIMD_FLOAT_OPS_UNARY_SINGLE (rsqrt, 0x52),
- SIMD_FLOAT_OPS_UNARY_SINGLE (rcp, 0x53),
-
-#define SIMD_FLOAT_OPSi(name, opc) \
- { I_##name##ps, { A_SSE, A_SSEM, A_I8 }, F_SSE41, E_RM, (0x0f3a00 + opc), PRE_66 }, \
- { I_v##name##ps, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f3a,WIG) }, \
- { I_v##name##ps, { A_AVX, A_AVX, A_AVXM, A_I8 }, F_AVX, E_RVM, opc, VEX(256,0x66,0x0f3a,WIG) }, \
- \
- { I_##name##pd, { A_SSE, A_SSEM, A_I8 }, F_SSE41, E_RM, (0x0f3a00 + opc + 1), PRE_66 }, \
- { I_v##name##pd, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc + 1, VEX(128,0x66,0x0f3a,WIG) }, \
- { I_v##name##pd, { A_AVX, A_AVX, A_AVXM, A_I8 }, F_AVX, E_RVM, opc + 1, VEX(256,0x66,0x0f3a,WIG) }
-
- SIMD_FLOAT_OPSi (blend, 0x0c),
-
- SIMD_FLOAT_OPSi (round, 0x08),
- { I_roundss, { A_SSE, A_SSEM, A_I8 }, F_SSE41, E_RM, 0x0f3a0a, PRE_66 },
- { I_vroundss, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, 0x0a, VEX(LIG,0x66,0x0f3a,WIG) },
- { I_roundsd, { A_SSE, A_SSEM, A_I8 }, F_SSE41, E_RM, 0x0f3a0b, PRE_66 },
- { I_vroundsd, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, 0x0b, VEX(LIG,0x66,0x0f3a,WIG) },
+ /* MMX */
+ { I_movq, { A_MMX, A_MMXM }, F_MMX, E_RM, 0x0f6f, NO_REX_W },
+ { I_movq, { A_MMXM, A_MMX }, F_MMX, E_MR, 0x0f7f, NO_REX_W },
+ { I_movq, { A_SSE, A_SSEM }, F_SSE2, E_RM, 0x0f7e, PRE_F3 | NO_REX_W },
+ { I_movq, { A_SSEM, A_SSE }, F_SSE2, E_MR, 0x0fd6, PRE_66 | NO_REX_W },
#define MMX_TRINARY(name, opc) \
{ I_##name, { A_MMX, A_MMXM }, F_MMX, E_RM, (0x0f00 + opc) }, \
@@ -542,42 +434,147 @@ static const variant_t variants[] =
MMX_SHIFT (psraw, 0xe1, 4),
MMX_SHIFT (psrad, 0xe2, 4),
-#define SIMD_0F_OPS(name, opc, mmx_flag) \
- { I_##name, { A_MMX, A_MMXM }, mmx_flag, E_RM, (0x0f00 + opc) }, \
+ /* pmaddubsw is special because it wasn't extended to SSE registers until SSSE3 */
+ { I_pmaddubsw, { A_MMX, A_MMXM }, F_MMX, E_RM, 0x0f3804 },
+ { I_pmaddubsw, { A_SSE, A_SSEM }, F_SSSE3, E_RM, 0x0f3804, PRE_66 },
+ { I_vpmaddubsw, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, 0x04, VEX(128,0x66,0x0f38,WIG) },
+ { I_vpmaddubsw, { A_AVX, A_AVX, A_AVXM }, F_AVX2, E_RVM, 0x04, VEX(256,0x66,0x0f38,WIG) },
+
+ /* AMD's MMX extensions */
+#define MMX_EX_TRINARY(name, opc) \
+ { I_##name, { A_MMX, A_MMXM }, F_MMX_EX, E_RM, (0x0f00 + opc) }, \
{ I_##name, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_66 }, \
{ I_v##name, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f,WIG) }, \
{ I_v##name, { A_AVX, A_AVX, A_AVXM }, F_AVX2, E_RVM, opc, VEX(256,0x66,0x0f,WIG) }
-
- /* AMD's MMX extensions */
- SIMD_0F_OPS (pavgb, 0xe0, F_MMX_EX),
- SIMD_0F_OPS (pavgw, 0xe3, F_MMX_EX),
- SIMD_0F_OPS (pmaxsw, 0xee, F_MMX_EX),
- SIMD_0F_OPS (pmaxub, 0xde, F_MMX_EX),
- SIMD_0F_OPS (pminsw, 0xea, F_MMX_EX),
- SIMD_0F_OPS (pminub, 0xda, F_MMX_EX),
- SIMD_0F_OPS (pmulhuw, 0xe4, F_MMX_EX),
- SIMD_0F_OPS (psadbw, 0xf6, F_MMX_EX),
-
+
+ MMX_EX_TRINARY (pavgb, 0xe0),
+ MMX_EX_TRINARY (pavgw, 0xe3),
+ MMX_EX_TRINARY (pmaxsw, 0xee),
+ MMX_EX_TRINARY (pmaxub, 0xde),
+ MMX_EX_TRINARY (pminsw, 0xea),
+ MMX_EX_TRINARY (pminub, 0xda),
+ MMX_EX_TRINARY (pmulhuw, 0xe4),
+ MMX_EX_TRINARY (psadbw, 0xf6),
+
+ /* pinsrw, pextrw are special because they don't have AVX2 variants and
+ * because SSE4.1 added new variants of pextrw. pmovmskb also doesn't
+ * have an AVX2 version, and it is binary.
+ */
{ I_pinsrw, { A_MMX, A_RM32, A_I8 }, F_MMX_EX, E_RM, 0x0fc4 },
{ I_pinsrw, { A_SSE, A_RM32, A_I8 }, F_SSE2, E_RM, 0x0fc4, PRE_66 },
{ I_vpinsrw, { A_SSE, A_SSE, A_RM32, A_I8 }, F_AVX, E_RVM, 0xc4, VEX(128,0x66,0x0f,W0) },
- /* No AVX2 version for pinsrw */
{ I_pextrw, { A_R, A_MMX, A_I8 }, F_MMX_EX, E_RM, 0x0fc5 },
{ I_pextrw, { A_R, A_SSE, A_I8 }, F_SSE2, E_RM, 0x0fc5, PRE_66 },
{ I_pextrw, { A_RM, A_SSE, A_I8 }, F_SSE41, E_MR, 0x0f3a15, PRE_66 },
{ I_vpextrw, { A_R, A_AVX, A_I8 }, F_AVX, E_RM, 0xc5, VEX(128,0x66,0x0f,W0) },
{ I_vpextrw, { A_RM, A_AVX, A_I8 }, F_AVX, E_MR, 0x15, VEX(128,0x66,0x0f3a,W0) },
- /* No AVX2 version for pextrw */
-
{ I_pmovmskb, { A_R, A_MMX }, F_MMX_EX, E_RM, 0x0fd7 },
{ I_pmovmskb, { A_R, A_SSE }, F_SSE2, E_RM, 0x0fd7, PRE_66 },
{ I_vpmovmskb, { A_R, A_SSE }, F_AVX, E_RM, 0xd7, VEX(128,0x66,0x0f,WIG) },
-
- /* MMX version appeared with SSE2 */
- SIMD_0F_OPS (paddq, 0xd4, F_SSE2),
- SIMD_0F_OPS (psubq, 0xfb, F_SSE2),
- SIMD_0F_OPS (pmuludq, 0xf4, F_SSE2),
+ /* SSE */
+#define SSE_TRINARY(name, opc) \
+ { I_##name##ps, { A_SSE, A_SSEM }, F_SSE, E_RM, (0x0f00 + opc) }, \
+ { I_v##name##ps, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x00,0x0f,WIG) }, \
+ { I_v##name##ps, { A_AVX, A_AVX, A_AVXM }, F_AVX, E_RVM, opc, VEX(256,0x00,0x0f,WIG) }, \
+ { I_##name##pd, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_66 }, \
+ { I_v##name##pd, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f,WIG) }, \
+ { I_v##name##pd, { A_AVX, A_AVX, A_AVXM }, F_AVX, E_RVM, opc, VEX(256,0x66,0x0f,WIG) }, \
+ { I_##name##ss, { A_SSE, A_SSEM }, F_SSE, E_RM, (0x0f00 + opc), PRE_F3 }, \
+ { I_v##name##ss, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(LIG,0xf3,0x0f,WIG) }, \
+ { I_##name##sd, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_F2 }, \
+ { I_v##name##sd, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(LIG,0xf2,0x0f,WIG) }
+
+ SSE_TRINARY (add, 0x58),
+ SSE_TRINARY (and, 0x54),
+ SSE_TRINARY (andn, 0x55),
+ SSE_TRINARY (div, 0x5e),
+ SSE_TRINARY (max, 0x5f),
+ SSE_TRINARY (min, 0x5d),
+ SSE_TRINARY (mul, 0x59),
+ SSE_TRINARY (or, 0x56),
+ SSE_TRINARY (sub, 0x5c),
+ SSE_TRINARY (xor, 0x57),
+
+#define SSE_TRINARY_IMM(name, opc) \
+ { I_##name##ps, { A_SSE, A_SSEM, A_I8 }, F_SSE, E_RM, (0x0f00 + opc) }, \
+ { I_v##name##ps, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(128,0x00,0x0f,WIG) }, \
+ { I_v##name##ps, { A_AVX, A_AVX, A_AVXM, A_I8 }, F_AVX, E_RVM, opc, VEX(256,0x00,0x0f,WIG) }, \
+ { I_##name##pd, { A_SSE, A_SSEM, A_I8 }, F_SSE2, E_RM, (0x0f00 + opc), PRE_66 }, \
+ { I_v##name##pd, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f,WIG) }, \
+ { I_v##name##pd, { A_AVX, A_AVX, A_AVXM, A_I8 }, F_AVX, E_RVM, opc, VEX(256,0x66,0x0f,WIG) }, \
+ { I_##name##ss, { A_SSE, A_SSEM, A_I8 }, F_SSE, E_RM, (0x0f00 + opc), PRE_F3 }, \
+ { I_v##name##ss, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(LIG,0xf3,0x0f,WIG) }, \
+ { I_##name##sd, { A_SSE, A_SSEM, A_I8 }, F_SSE2, E_RM, (0x0f00 + opc), PRE_F2 }, \
+ { I_v##name##sd, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(LIG,0xf2,0x0f,WIG) }
+
+ SSE_TRINARY_IMM (cmp, 0xc2),
+ SSE_TRINARY_IMM (shuf, 0xc6),
+
+#define SSE_UNARY_SINGLE(name, opc) \
+ { I_##name##ps, { A_SSE, A_SSEM }, F_SSE, E_RM, (0x0f00 + opc) }, \
+ { I_v##name##ps, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(128,0x00,0x0f,WIG) }, \
+ { I_v##name##ps, { A_AVX, A_AVXM }, F_AVX, E_RM, opc, VEX(256,0x00,0x0f,WIG) }, \
+ { I_##name##ss, { A_SSE, A_SSEM }, F_SSE, E_RM, (0x0f00 + opc), PRE_F3 }, \
+ { I_v##name##ss, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(LIG,0xf3,0x0f,WIG) }
+
+#define SSE_UNARY_DOUBLE(name, opc) \
+ { I_##name##pd, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_66 }, \
+ { I_v##name##pd, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(128,0x66,0x0f,WIG) }, \
+ { I_v##name##pd, { A_AVX, A_AVXM }, F_AVX, E_RM, opc, VEX(256,0x66,0x0f,WIG) }, \
+ { I_##name##sd, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_F2 }, \
+ { I_v##name##sd, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(LIG,0xf2,0x0f,WIG) }
+
+ SSE_UNARY_SINGLE (sqrt, 0x51),
+ SSE_UNARY_DOUBLE (sqrt, 0x51),
+ SSE_UNARY_SINGLE (rsqrt, 0x52),
+ SSE_UNARY_SINGLE (rcp, 0x53),
+
+ /* SSE 2 */
+ { I_movq2dq, { A_SSE, A_MMX }, F_SSE2, E_RM, 0x0fd6, PRE_F3 | NO_REX_W },
+ { I_movdqa, { A_SSE, A_SSEM }, F_SSE2, E_RM, 0x0f6f, PRE_66 },
+ { I_movdqa, { A_SSEM, A_SSE }, F_SSE2, E_MR, 0x0f7f, PRE_66 },
+ { I_vmovdqa, { A_SSE, A_SSEM }, F_AVX, E_RM, 0x6f, VEX(128,0x66,0x0f,WIG) },
+ { I_vmovdqa, { A_SSEM, A_SSE }, F_AVX, E_MR, 0x7f, VEX(128,0x66,0x0f,WIG) },
+ { I_vmovdqa, { A_AVX, A_AVXM }, F_AVX, E_RM, 0x6f, VEX(256,0x66,0x0f,WIG) },
+ { I_vmovdqa, { A_AVXM, A_AVX }, F_AVX, E_MR, 0x7f, VEX(256,0x66,0x0f,WIG) },
+ { I_movdqu, { A_SSE, A_SSEM }, F_SSE2, E_RM, 0x0f6f, PRE_F3 },
+ { I_movdqu, { A_SSEM, A_SSE }, F_SSE2, E_MR, 0x0f7f, PRE_F3 },
+ { I_vmovdqu, { A_SSE, A_SSEM }, F_AVX, E_RM, 0x6f, VEX(128,0xf3,0x0f,WIG) },
+ { I_vmovdqu, { A_SSEM, A_SSE }, F_AVX, E_MR, 0x7f, VEX(128,0xf3,0x0f,WIG) },
+ { I_vmovdqu, { A_AVX, A_AVXM }, F_AVX, E_RM, 0x6f, VEX(256,0xf3,0x0f,WIG) },
+ { I_vmovdqu, { A_AVXM, A_AVX }, F_AVX, E_MR, 0x7f, VEX(256,0xf3,0x0f,WIG) },
+
+#define SSE2_TRINARY(name, opc) \
+ { I_##name, { A_MMX, A_MMXM }, F_SSE2, E_RM, (0x0f00 + opc) }, \
+ { I_##name, { A_SSE, A_SSEM }, F_SSE2, E_RM, (0x0f00 + opc), PRE_66 }, \
+ { I_v##name, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f,WIG) }, \
+ { I_v##name, { A_AVX, A_AVX, A_AVXM }, F_AVX2, E_RVM, opc, VEX(256,0x66,0x0f,WIG) }
+
+ SSE2_TRINARY (paddq, 0xd4),
+ SSE2_TRINARY (psubq, 0xfb),
+ SSE2_TRINARY (pmuludq, 0xf4),
+
+ /* These don't have MMX variants */
+ { I_pshufhw, { A_SSE, A_SSEM, A_I8 }, F_SSE2, E_RM, 0x0f70, PRE_F3 },
+ { I_vpshufhw, { A_SSE, A_SSEM, A_I8 }, F_AVX, E_RM, 0x70, VEX(128,0xf3,0x0f,WIG) },
+ { I_vpshufhw, { A_AVX, A_AVXM, A_I8 }, F_AVX2, E_RM, 0x70, VEX(256,0xf3,0x0f,WIG) },
+ { I_pshuflw, { A_SSE, A_SSEM, A_I8 }, F_SSE2, E_RM, 0x0f70, PRE_F2 },
+ { I_vpshuflw, { A_SSE, A_SSEM, A_I8 }, F_AVX, E_RM, 0x70, VEX(128,0xf2,0x0f,WIG) },
+ { I_vpshuflw, { A_AVX, A_AVXM, A_I8 }, F_AVX2, E_RM, 0x70, VEX(256,0xf2,0x0f,WIG) },
+
+ /* SSE3 */
+ { I_lddqu, { A_SSE, A_MEM }, F_SSE3, E_RM, 0x0ff0, PRE_F2 },
+ { I_vlddqu, { A_SSE, A_MEM }, F_AVX, E_RM, 0xf0, VEX(128,0xf2,0x0f,WIG) },
+ { I_vlddqu, { A_AVX, A_MEM }, F_AVX, E_RM, 0xf0, VEX(256,0xf2,0x0f,WIG) },
+ { I_hsubps, { A_SSE, A_SSEM }, F_SSE3, E_RM, 0x0f7d, PRE_F2 },
+ { I_vhsubps, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, 0x7d, VEX(128,0xf2,0x0f,WIG) },
+ { I_vhsubps, { A_AVX, A_AVX, A_AVXM }, F_AVX, E_RVM, 0x7d, VEX(256,0Xf2,0x0f,WIG) },
+ { I_hsubpd, { A_SSE, A_SSEM }, F_SSE3, E_RM, 0x0f7d, PRE_66 },
+ { I_vhsubpd, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, 0x7d, VEX(128,0x66,0x0f,WIG) },
+ { I_vhsubpd, { A_AVX, A_AVX, A_AVXM }, F_AVX, E_RVM, 0x7d, VEX(256,0x66,0x0f,WIG) },
+
+ /* SSSE 3 */
#define SSSE3_BINARY(name, opc) \
{ I_##name, { A_MMX, A_MMXM }, F_SSSE3, E_RM, (0x0f3800 + opc) }, \
{ I_##name, { A_SSE, A_SSEM }, F_SSSE3, E_RM, (0x0f3800 + opc), PRE_66 }, \
@@ -591,8 +588,8 @@ static const variant_t variants[] =
#define SSSE3_TRINARY(name, opc) \
{ I_##name, { A_MMX, A_MMXM }, F_SSSE3, E_RM, (0x0f3800 + opc) }, \
{ I_##name, { A_SSE, A_SSEM }, F_SSSE3, E_RM, (0x0f3800 + opc), PRE_66 }, \
- { I_v##name, { A_SSE, A_SSEM, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f38,WIG) }, \
- { I_v##name, { A_AVX, A_SSEM, A_AVXM }, F_AVX2, E_RVM, opc, VEX(256,0x66,0x0f38,WIG) }
+ { I_v##name, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f38,WIG) }, \
+ { I_v##name, { A_AVX, A_SSE, A_AVXM }, F_AVX2, E_RVM, opc, VEX(256,0x66,0x0f38,WIG) }
SSSE3_TRINARY (psignb, 0x08),
SSSE3_TRINARY (psignw, 0x09),
@@ -606,17 +603,21 @@ static const variant_t variants[] =
SSSE3_TRINARY (pmulhrsw, 0x0b),
SSSE3_TRINARY (pshufb, 0x00),
+ { I_palignr, { A_MMX, A_MMXM, A_I8 }, F_SSSE3, E_RM, 0x0f3a0f },
+ { I_palignr, { A_SSE, A_SSEM, A_I8 }, F_SSSE3, E_RM, 0x0f3a0f, PRE_66 },
+ { I_vpalignr, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, 0x0f, VEX(128,0x66,0x0f3a,WIG) },
+
+ /* SSE 4.1 */
#define SSE41_BINARY(name, opc) \
{ I_##name, { A_SSE, A_SSEM }, F_SSE41, E_RM, (0x0f3800 + opc), PRE_66 }, \
{ I_v##name, { A_SSE, A_SSEM }, F_AVX, E_RM, opc, VEX(128,0x66,0x0f38,WIG) }, \
{ I_v##name, { A_AVX, A_AVXM }, F_AVX2, E_RM, opc, VEX(256,0x66,0x0f38,WIG) }
- SSE41_BINARY (ptest, 0x17),
-
#define SSE41_PMOVX(type, opc) \
- SSE41_BINARY (pmovsx##type, opc), \
+ SSE41_BINARY (pmovsx##type, opc), \
SSE41_BINARY (pmovzx##type, opc + 0x10)
+ SSE41_BINARY (ptest, 0x17),
SSE41_PMOVX (bw, 0x20),
SSE41_PMOVX (bd, 0x21),
SSE41_PMOVX (bq, 0x22),
@@ -624,10 +625,14 @@ static const variant_t variants[] =
SSE41_PMOVX (wq, 0x24),
SSE41_PMOVX (dq, 0x25),
+ /* No AVX2 variant for phminposuw */
+ { I_phminposuw, { A_SSE, A_SSEM }, F_SSE41, E_RM, 0x0f3841, PRE_66 },
+ { I_vphminposuw, { A_SSE, A_SSEM }, F_AVX, E_RM, 0x41, VEX(128,0x66,0x0f38,WIG) },
+
#define SSE41_TRINARY(name, opc) \
{ I_##name, { A_SSE, A_SSEM }, F_SSE41, E_RM, (0x0f3800 + opc), PRE_66 }, \
- { I_v##name, { A_SSE, A_SSEM, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f38,WIG) }, \
- { I_v##name, { A_AVX, A_SSEM, A_AVXM }, F_AVX2, E_RVM, opc, VEX(256,0x66,0x0f38,WIG) }
+ { I_v##name, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f38,WIG) }, \
+ { I_v##name, { A_AVX, A_SSE, A_AVXM }, F_AVX2, E_RVM, opc, VEX(256,0x66,0x0f38,WIG) }
SSE41_TRINARY (packusdw, 0x2b),
SSE41_TRINARY (pcmpeqq, 0x29),
@@ -642,31 +647,37 @@ static const variant_t variants[] =
SSE41_TRINARY (pmuldq, 0x28),
SSE41_TRINARY (pmulld, 0x40),
- { I_phminposuw, { A_SSE, A_SSEM }, F_SSE41, E_RM, 0x0f3841, PRE_66 },
- { I_vphminposuw, { A_SSE, A_SSEM }, F_AVX, E_RM, 0x41, VEX(128,0x66,0x0f38,WIG) },
- /* No AVX2 variant for phminposuw */
-
+#define SSE41_TRINARY_IMM(name, opc) \
+ { I_##name##ps, { A_SSE, A_SSEM, A_I8 }, F_SSE41, E_RM, (0x0f3a00 + opc), PRE_66 }, \
+ { I_v##name##ps, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f3a,WIG) }, \
+ { I_v##name##ps, { A_AVX, A_AVX, A_AVXM, A_I8 }, F_AVX, E_RVM, opc, VEX(256,0x66,0x0f3a,WIG) }, \
+ { I_##name##pd, { A_SSE, A_SSEM, A_I8 }, F_SSE41, E_RM, (0x0f3a00 + opc + 1), PRE_66 }, \
+ { I_v##name##pd, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, opc + 1, VEX(128,0x66,0x0f3a,WIG) }, \
+ { I_v##name##pd, { A_AVX, A_AVX, A_AVXM, A_I8 }, F_AVX, E_RVM, opc + 1, VEX(256,0x66,0x0f3a,WIG) }
- /* SSE 42 */
-#define SSE42_TRINARY(name, opc) \
- { I_##name, { A_SSE, A_SSEM }, F_SSE42, E_RM, (0x0f3800 + opc), PRE_66 }, \
- { I_v##name, { A_SSE, A_SSEM, A_SSEM }, F_AVX, E_RVM, opc, VEX(128,0x66,0x0f38,WIG) }, \
- { I_v##name, { A_AVX, A_SSEM, A_AVXM }, F_AVX2, E_RVM, opc, VEX(256,0x66,0x0f38,WIG) }
+ SSE41_TRINARY_IMM (blend, 0x0c),
+ SSE41_TRINARY_IMM (round, 0x08),
+ { I_roundss, { A_SSE, A_SSEM, A_I8 }, F_SSE41, E_RM, 0x0f3a0a, PRE_66 },
+ { I_vroundss, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, 0x0a, VEX(LIG,0x66,0x0f3a,WIG) },
+ { I_roundsd, { A_SSE, A_SSEM, A_I8 }, F_SSE41, E_RM, 0x0f3a0b, PRE_66 },
+ { I_vroundsd, { A_SSE, A_SSE, A_SSEM, A_I8 }, F_AVX, E_RVM, 0x0b, VEX(LIG,0x66,0x0f3a,WIG) },
- SSE42_TRINARY (pcmpgtq, 0x37),
+ /* SSE 4.2 */
+ { I_pcmpgtq, { A_SSE, A_SSEM }, F_SSE42, E_RM, 0x0f3837, PRE_66 },
+ { I_vpcmpgtq, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, 0x37, VEX(128,0x66,0x0f38,WIG) },
+ { I_vpcmpgtq, { A_AVX, A_AVX, A_AVXM }, F_AVX2, E_RVM, 0x37, VEX(256,0x66,0x0f38,WIG) },
- { I_pmaddubsw, { A_MMX, A_MMXM }, F_MMX, E_RM, 0x0f3804 },
- { I_pmaddubsw, { A_SSE, A_SSEM }, F_SSSE3, E_RM, 0x0f3804, PRE_66 },
- { I_vpmaddubsw, { A_SSE, A_SSE, A_SSEM }, F_AVX, E_RVM, 0x04, VEX(128,0x66,0x0f38,WIG) },
- { I_vpmaddubsw, { A_AVX, A_AVX, A_AVXM }, F_AVX2, E_RVM, 0x04, VEX(256,0x66,0x0f38,WIG) },
+ /* AVX */
+ { I_vmaskmovps, { A_SSE, A_SSE, A_MEM }, F_AVX, E_RVM, 0x2c, VEX(128,0x66,0x0f38,W0) },
+ { I_vmaskmovps, { A_AVX, A_AVX, A_MEM }, F_AVX, E_RVM, 0x2c, VEX(256,0x66,0x0f38,W0) },
+ { I_vmaskmovps, { A_MEM, A_SSE, A_SSE }, F_AVX, E_MVR, 0x2e, VEX(128,0x66,0x0f38,W0) },
+ { I_vmaskmovps, { A_MEM, A_AVX, A_AVX }, F_AVX, E_MVR, 0x2e, VEX(256,0x66,0x0f38,W0) },
+ { I_vmaskmovpd, { A_SSE, A_SSE, A_MEM }, F_AVX, E_RVM, 0x2d, VEX(128,0x66,0x0f38,W0) },
+ { I_vmaskmovpd, { A_AVX, A_AVX, A_MEM }, F_AVX, E_RVM, 0x2d, VEX(256,0x66,0x0f38,W0) },
+ { I_vmaskmovpd, { A_MEM, A_SSE, A_SSE }, F_AVX, E_MVR, 0x2f, VEX(128,0x66,0x0f38,W0) },
+ { I_vmaskmovpd, { A_MEM, A_AVX, A_AVX }, F_AVX, E_MVR, 0x2f, VEX(256,0x66,0x0f38,W0) },
- { I_pshufhw, { A_SSE, A_SSEM, A_I8 }, F_SSE2, E_RM, 0x0f70, PRE_F3 },
- { I_vpshufhw, { A_SSE, A_SSEM, A_I8 }, F_AVX, E_RM, 0x70, VEX(128,0xf3,0x0f,WIG) },
- { I_vpshufhw, { A_AVX, A_AVXM, A_I8 }, F_AVX2, E_RM, 0x70, VEX(256,0xf3,0x0f,WIG) },
- { I_pshuflw, { A_SSE, A_SSEM, A_I8 }, F_SSE2, E_RM, 0x0f70, PRE_F2 },
- { I_vpshuflw, { A_SSE, A_SSEM, A_I8 }, F_AVX, E_RM, 0x70, VEX(128,0xf2,0x0f,WIG) },
- { I_vpshuflw, { A_AVX, A_AVXM, A_I8 }, F_AVX2, E_RM, 0x70, VEX(256,0xf2,0x0f,WIG) },
-
+ /* CVT16 */
{ I_vcvtph2ps, { A_SSE, A_SSEM }, F_CVT16, E_RM, 0x13, VEX(128,0x66,0x0f38,W0) },
{ I_vcvtph2ps, { A_AVX, A_SSEM }, F_CVT16, E_RM, 0x13, VEX(256,0x66,0x0f38,W0) },
{ I_vcvtps2ph, { A_SSEM, A_AVX, A_I8 }, F_CVT16, E_MR, 0x1d, VEX(256,0x66,0x0f3a,W0) },