perf tools: Add AVX-512 support to the instruction decoder used by Intel PT

Add support for Intel's AVX-512 instructions to perf tools instruction
decoder used by Intel PT.  The kernel's instruction decoder was updated in
a previous patch.

AVX-512 instructions are documented in Intel Architecture Instruction Set
Extensions Programming Reference (February 2016).

AVX-512 instructions are identified by a EVEX prefix which, for the purpose
of instruction decoding, can be treated as though it were a 4-byte VEX
prefix.

Existing instructions which can now accept an EVEX prefix need not be
further annotated in the op code map (x86-opcode-map.txt). In the case of
new instructions, the op code map is updated accordingly.

Also add associated Mask Instructions that are used to manipulate mask
registers used in AVX-512 instructions.

A representative set of instructions is added to the perf tools new
instructions test in a subsequent patch.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: X86 ML <x86@kernel.org>
Link: http://lkml.kernel.org/r/1469003437-32706-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Adrian Hunter 2016-07-20 11:30:36 +03:00 committed by Arnaldo Carvalho de Melo
parent 25af37f4e1
commit c61f4d5eba
5 changed files with 220 additions and 101 deletions

View File

@ -72,12 +72,14 @@ BEGIN {
lprefix_expr = "\\((66|F2|F3)\\)"
max_lprefix = 4
# All opcodes starting with lower-case 'v' or with (v1) superscript
# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
# accepts VEX prefix
vexok_opcode_expr = "^v.*"
vexok_opcode_expr = "^[vk].*"
vexok_expr = "\\(v1\\)"
# All opcodes with (v) superscript supports *only* VEX prefix
vexonly_expr = "\\(v\\)"
# All opcodes with (ev) superscript supports *only* EVEX prefix
evexonly_expr = "\\(ev\\)"
prefix_expr = "\\(Prefix\\)"
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@ -95,6 +97,7 @@ BEGIN {
prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
prefix_num["EVEX"] = "INAT_PFX_EVEX"
clear_vars()
}
@ -319,7 +322,9 @@ function convert_operands(count,opnd, i,j,imm,mod)
flags = add_flags(flags, "INAT_MODRM")
# check VEX codes
if (match(ext, vexonly_expr))
if (match(ext, evexonly_expr))
flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
else if (match(ext, vexonly_expr))
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
flags = add_flags(flags, "INAT_VEXOK")

View File

@ -48,6 +48,7 @@
/* AVX VEX prefixes */
#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
#define INAT_PFX_EVEX 15 /* EVEX prefix */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
@ -89,6 +90,7 @@
#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr)
static inline int inat_is_vex_prefix(insn_attr_t attr)
{
attr &= INAT_PFX_MASK;
return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
attr == INAT_PFX_EVEX;
}
static inline int inat_is_evex_prefix(insn_attr_t attr)
{
return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
}
static inline int inat_is_vex3_prefix(insn_attr_t attr)
@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
static inline int inat_must_vex(insn_attr_t attr)
{
return attr & INAT_VEXONLY;
return attr & (INAT_VEXONLY | INAT_EVEXONLY);
}
static inline int inat_must_evex(insn_attr_t attr)
{
return attr & INAT_EVEXONLY;
}
#endif

View File

@ -155,14 +155,24 @@ found:
/*
* In 32-bits mode, if the [7:6] bits (mod bits of
* ModRM) on the second byte are not 11b, it is
* LDS or LES.
* LDS or LES or BOUND.
*/
if (X86_MODRM_MOD(b2) != 3)
goto vex_end;
}
insn->vex_prefix.bytes[0] = b;
insn->vex_prefix.bytes[1] = b2;
if (inat_is_vex3_prefix(attr)) {
if (inat_is_evex_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
insn->vex_prefix.bytes[2] = b2;
b2 = peek_nbyte_next(insn_byte_t, insn, 3);
insn->vex_prefix.bytes[3] = b2;
insn->vex_prefix.nbytes = 4;
insn->next_byte += 4;
if (insn->x86_64 && X86_VEX_W(b2))
/* VEX.W overrides opnd_size */
insn->opnd_bytes = 8;
} else if (inat_is_vex3_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
insn->vex_prefix.bytes[2] = b2;
insn->vex_prefix.nbytes = 3;
@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn)
m = insn_vex_m_bits(insn);
p = insn_vex_p_bits(insn);
insn->attr = inat_get_avx_attribute(op, m, p);
if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
(!inat_accept_vex(insn->attr) &&
!inat_is_group(insn->attr)))
insn->attr = 0; /* This instruction is bad */
goto end; /* VEX has only 1 byte for opcode */
}

View File

@ -91,6 +91,7 @@ struct insn {
#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
/* VEX bit fields */
#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
#define X86_VEX2_M 1 /* VEX2.M always 1 */
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn)
return (insn->vex_prefix.value != 0);
}
static inline int insn_is_evex(struct insn *insn)
{
if (!insn->prefixes.got)
insn_get_prefixes(insn);
return (insn->vex_prefix.nbytes == 4);
}
/* Ensure this instruction is decoded completely */
static inline int insn_complete(struct insn *insn)
{
@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
{
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
return X86_VEX2_M;
else
else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */
return X86_VEX3_M(insn->vex_prefix.bytes[1]);
else /* EVEX */
return X86_EVEX_M(insn->vex_prefix.bytes[1]);
}
static inline insn_byte_t insn_vex_p_bits(struct insn *insn)

View File

@ -13,12 +13,17 @@
# opcode: escape # escaped-name
# EndTable
#
# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
# mnemonics that begin with lowercase 'k' accept a VEX prefix
#
#<group maps>
# GrpTable: GrpXXX
# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
# EndTable
#
# AVX Superscripts
# (ev): this opcode requires EVEX prefix.
# (evo): this opcode is changed by EVEX prefix (EVEX opcode)
# (v): this opcode requires VEX prefix.
# (v1): this opcode only supports 128bit VEX.
#
@ -137,7 +142,7 @@ AVXcode:
# 0x60 - 0x6f
60: PUSHA/PUSHAD (i64)
61: POPA/POPAD (i64)
62: BOUND Gv,Ma (i64)
62: BOUND Gv,Ma (i64) | EVEX (Prefix)
63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
64: SEG=FS (Prefix)
65: SEG=GS (Prefix)
@ -399,17 +404,17 @@ AVXcode: 1
3f:
# 0x0f 0x40-0x4f
40: CMOVO Gv,Ev
41: CMOVNO Gv,Ev
42: CMOVB/C/NAE Gv,Ev
41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
43: CMOVAE/NB/NC Gv,Ev
44: CMOVE/Z Gv,Ev
45: CMOVNE/NZ Gv,Ev
46: CMOVBE/NA Gv,Ev
47: CMOVA/NBE Gv,Ev
44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
48: CMOVS Gv,Ev
49: CMOVNS Gv,Ev
4a: CMOVP/PE Gv,Ev
4b: CMOVNP/PO Gv,Ev
4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
4c: CMOVL/NGE Gv,Ev
4d: CMOVNL/GE Gv,Ev
4e: CMOVLE/NG Gv,Ev
@ -426,7 +431,7 @@ AVXcode: 1
58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
@ -447,7 +452,7 @@ AVXcode: 1
6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
# 0x0f 0x70-0x7f
70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
71: Grp12 (1A)
@ -458,14 +463,14 @@ AVXcode: 1
76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
77: emms | vzeroupper | vzeroall
78: VMREAD Ey,Gy
79: VMWRITE Gy,Ey
7a:
7b:
78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
80: JO Jz (f64)
@ -485,16 +490,16 @@ AVXcode: 1
8e: JLE/JNG Jz (f64)
8f: JNLE/JG Jz (f64)
# 0x0f 0x90-0x9f
90: SETO Eb
91: SETNO Eb
92: SETB/C/NAE Eb
93: SETAE/NB/NC Eb
90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
94: SETE/Z Eb
95: SETNE/NZ Eb
96: SETBE/NA Eb
97: SETA/NBE Eb
98: SETS Eb
99: SETNS Eb
98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
9a: SETP/PE Eb
9b: SETNP/PO Eb
9c: SETL/NGE Eb
@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
# 0x0f 0xe0-0xef
e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
# 0x0f 0xf0-0xff
f0: vlddqu Vx,Mx (F2)
f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
@ -626,81 +631,105 @@ AVXcode: 2
0e: vtestps Vx,Wx (66),(v)
0f: vtestpd Vx,Wx (66),(v)
# 0x0f 0x38 0x10-0x1f
10: pblendvb Vdq,Wdq (66)
11:
12:
13: vcvtph2ps Vx,Wx (66),(v)
14: blendvps Vdq,Wdq (66)
15: blendvpd Vdq,Wdq (66)
16: vpermps Vqq,Hqq,Wqq (66),(v)
10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
17: vptest Vx,Wx (66)
18: vbroadcastss Vx,Wd (66),(v)
19: vbroadcastsd Vqq,Wq (66),(v)
1a: vbroadcastf128 Vqq,Mdq (66),(v)
1b:
19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
1f:
1f: vpabsq Vx,Wx (66),(ev)
# 0x0f 0x38 0x20-0x2f
20: vpmovsxbw Vx,Ux/Mq (66),(v1)
21: vpmovsxbd Vx,Ux/Md (66),(v1)
22: vpmovsxbq Vx,Ux/Mw (66),(v1)
23: vpmovsxwd Vx,Ux/Mq (66),(v1)
24: vpmovsxwq Vx,Ux/Md (66),(v1)
25: vpmovsxdq Vx,Ux/Mq (66),(v1)
26:
27:
28: vpmuldq Vx,Hx,Wx (66),(v1)
29: vpcmpeqq Vx,Hx,Wx (66),(v1)
2a: vmovntdqa Vx,Mx (66),(v1)
20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
2b: vpackusdw Vx,Hx,Wx (66),(v1)
2c: vmaskmovps Vx,Hx,Mx (66),(v)
2d: vmaskmovpd Vx,Hx,Mx (66),(v)
2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
2e: vmaskmovps Mx,Hx,Vx (66),(v)
2f: vmaskmovpd Mx,Hx,Vx (66),(v)
# 0x0f 0x38 0x30-0x3f
30: vpmovzxbw Vx,Ux/Mq (66),(v1)
31: vpmovzxbd Vx,Ux/Md (66),(v1)
32: vpmovzxbq Vx,Ux/Mw (66),(v1)
33: vpmovzxwd Vx,Ux/Mq (66),(v1)
34: vpmovzxwq Vx,Ux/Md (66),(v1)
35: vpmovzxdq Vx,Ux/Mq (66),(v1)
36: vpermd Vqq,Hqq,Wqq (66),(v)
30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
37: vpcmpgtq Vx,Hx,Wx (66),(v1)
38: vpminsb Vx,Hx,Wx (66),(v1)
39: vpminsd Vx,Hx,Wx (66),(v1)
3a: vpminuw Vx,Hx,Wx (66),(v1)
3b: vpminud Vx,Hx,Wx (66),(v1)
38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
3c: vpmaxsb Vx,Hx,Wx (66),(v1)
3d: vpmaxsd Vx,Hx,Wx (66),(v1)
3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
3e: vpmaxuw Vx,Hx,Wx (66),(v1)
3f: vpmaxud Vx,Hx,Wx (66),(v1)
3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
# 0x0f 0x38 0x40-0x8f
40: vpmulld Vx,Hx,Wx (66),(v1)
40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
41: vphminposuw Vdq,Wdq (66),(v1)
42:
43:
44:
42: vgetexpps/d Vx,Wx (66),(ev)
43: vgetexpss/d Vx,Hx,Wx (66),(ev)
44: vplzcntd/q Vx,Wx (66),(ev)
45: vpsrlvd/q Vx,Hx,Wx (66),(v)
46: vpsravd Vx,Hx,Wx (66),(v)
46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
47: vpsllvd/q Vx,Hx,Wx (66),(v)
# Skip 0x48-0x57
# Skip 0x48-0x4b
4c: vrcp14ps/d Vpd,Wpd (66),(ev)
4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
# Skip 0x50-0x57
58: vpbroadcastd Vx,Wx (66),(v)
59: vpbroadcastq Vx,Wx (66),(v)
5a: vbroadcasti128 Vqq,Mdq (66),(v)
# Skip 0x5b-0x77
59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
# Skip 0x5c-0x63
64: vpblendmd/q Vx,Hx,Wx (66),(ev)
65: vblendmps/d Vx,Hx,Wx (66),(ev)
66: vpblendmb/w Vx,Hx,Wx (66),(ev)
# Skip 0x67-0x74
75: vpermi2b/w Vx,Hx,Wx (66),(ev)
76: vpermi2d/q Vx,Hx,Wx (66),(ev)
77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
78: vpbroadcastb Vx,Wx (66),(v)
79: vpbroadcastw Vx,Wx (66),(v)
# Skip 0x7a-0x7f
7a: vpbroadcastb Vx,Rv (66),(ev)
7b: vpbroadcastw Vx,Rv (66),(ev)
7c: vpbroadcastd/q Vx,Rv (66),(ev)
7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
81: INVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
89: vpexpandd/q Vx,Wx (66),(ev)
8a: vcompressps/d Wx,Vx (66),(ev)
8b: vpcompressd/q Wx,Vx (66),(ev)
8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
8d: vpermb/w Vx,Hx,Wx (66),(ev)
8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
# 0x0f 0x38 0x90-0xbf (FMA)
90: vgatherdd/q Vx,Hx,Wx (66),(v)
91: vgatherqd/q Vx,Hx,Wx (66),(v)
90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
92: vgatherdps/d Vx,Hx,Wx (66),(v)
93: vgatherqps/d Vx,Hx,Wx (66),(v)
94:
@ -715,6 +744,10 @@ AVXcode: 2
9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
a0: vpscatterdd/q Wx,Vx (66),(ev)
a1: vpscatterqd/q Wx,Vx (66),(ev)
a2: vscatterdps/d Wx,Vx (66),(ev)
a3: vscatterqps/d Wx,Vx (66),(ev)
a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
# 0x0f 0x38 0xc0-0xff
c8: sha1nexte Vdq,Wdq
c4: vpconflictd/q Vx,Wx (66),(ev)
c6: Grp18 (1A)
c7: Grp19 (1A)
c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
c9: sha1msg1 Vdq,Wdq
ca: sha1msg2 Vdq,Wdq
cb: sha256rnds2 Vdq,Wdq
cc: sha256msg1 Vdq,Wdq
cd: sha256msg2 Vdq,Wdq
ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
db: VAESIMC Vdq,Wdq (66),(v1)
dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@ -763,15 +801,15 @@ AVXcode: 3
00: vpermq Vqq,Wqq,Ib (66),(v)
01: vpermpd Vqq,Wqq,Ib (66),(v)
02: vpblendd Vx,Hx,Wx,Ib (66),(v)
03:
03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
04: vpermilps Vx,Wx,Ib (66),(v)
05: vpermilpd Vx,Wx,Ib (66),(v)
06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
07:
08: vroundps Vx,Wx,Ib (66)
09: vroundpd Vx,Wx,Ib (66)
0a: vroundss Vss,Wss,Ib (66),(v1)
0b: vroundsd Vsd,Wsd,Ib (66),(v1)
08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
0c: vblendps Vx,Hx,Wx,Ib (66)
0d: vblendpd Vx,Hx,Wx,Ib (66)
0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
@ -780,26 +818,51 @@ AVXcode: 3
15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
16: vpextrd/q Ey,Vdq,Ib (66),(v1)
17: vextractps Ed,Vdq,Ib (66),(v1)
18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
19: vextractf128 Wdq,Vqq,Ib (66),(v)
18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
1d: vcvtps2ph Wx,Vx,Ib (66),(v)
1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
39: vextracti128 Wdq,Vqq,Ib (66),(v)
23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
26: vgetmantps/d Vx,Wx,Ib (66),(ev)
27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
30: kshiftrb/w Vk,Uk,Ib (66),(v)
31: kshiftrd/q Vk,Uk,Ib (66),(v)
32: kshiftlb/w Vk,Uk,Ib (66),(v)
33: kshiftld/q Vk,Uk,Ib (66),(v)
38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
40: vdpps Vx,Hx,Wx,Ib (66)
41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
56: vreduceps/d Vx,Wx,Ib (66),(ev)
57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
66: vfpclassps/d Vk,Wx,Ib (66),(ev)
67: vfpclassss/d Vk,Wx,Ib (66),(ev)
cc: sha1rnds4 Vdq,Wdq,Ib
df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
f0: RORX Gy,Ey,Ib (F2),(v)
@ -927,8 +990,10 @@ GrpTable: Grp12
EndTable
GrpTable: Grp13
0: vprord/q Hx,Wx,Ib (66),(ev)
1: vprold/q Hx,Wx,Ib (66),(ev)
2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
EndTable
@ -963,6 +1028,20 @@ GrpTable: Grp17
3: BLSI By,Ey (v)
EndTable
GrpTable: Grp18
1: vgatherpf0dps/d Wx (66),(ev)
2: vgatherpf1dps/d Wx (66),(ev)
5: vscatterpf0dps/d Wx (66),(ev)
6: vscatterpf1dps/d Wx (66),(ev)
EndTable
GrpTable: Grp19
1: vgatherpf0qps/d Wx (66),(ev)
2: vgatherpf1qps/d Wx (66),(ev)
5: vscatterpf0qps/d Wx (66),(ev)
6: vscatterpf1qps/d Wx (66),(ev)
EndTable
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH